diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 9 | ||||
-rw-r--r-- | mm/migrate.c | 8 | ||||
-rw-r--r-- | mm/mmap.c | 34 | ||||
-rw-r--r-- | mm/nommu.c | 18 | ||||
-rw-r--r-- | mm/page-writeback.c | 5 | ||||
-rw-r--r-- | mm/page_alloc.c | 70 | ||||
-rw-r--r-- | mm/slab.c | 30 |
7 files changed, 133 insertions, 41 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index d3e3bd2..d213fed 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -401,7 +401,7 @@ unsigned long __init free_all_bootmem (void) return(free_all_bootmem_core(NODE_DATA(0))); } -void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal) { bootmem_data_t *bdata; void *ptr; @@ -409,7 +409,14 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned list_for_each_entry(bdata, &bdata_list, list) if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) return(ptr); + return NULL; +} +void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) +{ + void *mem = __alloc_bootmem_nopanic(size,align,goal); + if (mem) + return mem; /* * Whoops, we cannot satisfy the allocation request. */ diff --git a/mm/migrate.c b/mm/migrate.c index 09f6e4a..d444229 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -16,8 +16,7 @@ #include <linux/module.h> #include <linux/swap.h> #include <linux/pagemap.h> -#include <linux/buffer_head.h> /* for try_to_release_page(), - buffer_heads_over_limit */ +#include <linux/buffer_head.h> #include <linux/mm_inline.h> #include <linux/pagevec.h> #include <linux/rmap.h> @@ -28,8 +27,6 @@ #include "internal.h" -#include "internal.h" - /* The maximum number of pages to take off the LRU for migration */ #define MIGRATE_CHUNK_SIZE 256 @@ -176,7 +173,6 @@ unlock_retry: retry: return -EAGAIN; } -EXPORT_SYMBOL(swap_page); /* * Remove references for a page and establish the new page with the correct @@ -234,7 +230,7 @@ int migrate_page_remove_references(struct page *newpage, if (!page_mapping(page) || page_count(page) != nr_refs || *radix_pointer != page) { write_unlock_irq(&mapping->tree_lock); - return 1; + return -EAGAIN; } /* @@ -121,14 +121,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * only call if we're about to fail. */ n = nr_free_pages(); + + /* + * Leave reserved pages. The pages are not for anonymous pages. + */ + if (n <= totalreserve_pages) + goto error; + else + n -= totalreserve_pages; + + /* + * Leave the last 3% for root + */ if (!cap_sys_admin) n -= n / 32; free += n; if (free > pages) return 0; - vm_unacct_memory(pages); - return -ENOMEM; + + goto error; } allowed = (totalram_pages - hugetlb_total_pages()) @@ -150,7 +162,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) */ if (atomic_read(&vm_committed_space) < (long)allowed) return 0; - +error: vm_unacct_memory(pages); return -ENOMEM; @@ -220,6 +232,17 @@ asmlinkage unsigned long sys_brk(unsigned long brk) if (brk < mm->end_code) goto out; + + /* + * Check against rlimit here. If this check is done later after the test + * of oldbrk with newbrk then it can escape the test and let the data + * segment grow beyond its set limit the in case where the limit is + * not page aligned -Ram Gupta + */ + rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) + goto out; + newbrk = PAGE_ALIGN(brk); oldbrk = PAGE_ALIGN(mm->brk); if (oldbrk == newbrk) @@ -232,11 +255,6 @@ asmlinkage unsigned long sys_brk(unsigned long brk) goto out; } - /* Check against rlimit.. */ - rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; - if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) - goto out; - /* Check against existing mmap mappings. */ if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) goto out; @@ -1147,14 +1147,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * only call if we're about to fail. */ n = nr_free_pages(); + + /* + * Leave reserved pages. The pages are not for anonymous pages. + */ + if (n <= totalreserve_pages) + goto error; + else + n -= totalreserve_pages; + + /* + * Leave the last 3% for root + */ if (!cap_sys_admin) n -= n / 32; free += n; if (free > pages) return 0; - vm_unacct_memory(pages); - return -ENOMEM; + + goto error; } allowed = totalram_pages * sysctl_overcommit_ratio / 100; @@ -1175,7 +1187,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) */ if (atomic_read(&vm_committed_space) < (long)allowed) return 0; - +error: vm_unacct_memory(pages); return -ENOMEM; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6dcce3a..75d7f48 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -72,13 +72,12 @@ int dirty_background_ratio = 10; int vm_dirty_ratio = 40; /* - * The interval between `kupdate'-style writebacks, in centiseconds - * (hundredths of a second) + * The interval between `kupdate'-style writebacks, in jiffies */ int dirty_writeback_interval = 5 * HZ; /* - * The longest number of centiseconds for which data is allowed to remain dirty + * The longest number of jiffies for which data is allowed to remain dirty */ int dirty_expire_interval = 30 * HZ; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dc523a1..97d6827 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; EXPORT_SYMBOL(node_possible_map); unsigned long totalram_pages __read_mostly; unsigned long totalhigh_pages __read_mostly; +unsigned long totalreserve_pages __read_mostly; long nr_swap_pages; int percpu_pagelist_fraction; @@ -151,7 +152,8 @@ static void bad_page(struct page *page) 1 << PG_reclaim | 1 << PG_slab | 1 << PG_swapcache | - 1 << PG_writeback ); + 1 << PG_writeback | + 1 << PG_buddy ); set_page_count(page, 0); reset_page_mapcount(page); page->mapping = NULL; @@ -236,12 +238,12 @@ static inline unsigned long page_order(struct page *page) { static inline void set_page_order(struct page *page, int order) { set_page_private(page, order); - __SetPagePrivate(page); + __SetPageBuddy(page); } static inline void rmv_page_order(struct page *page) { - __ClearPagePrivate(page); + __ClearPageBuddy(page); set_page_private(page, 0); } @@ -280,11 +282,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order) * This function checks whether a page is free && is the buddy * we can do coalesce a page and its buddy if * (a) the buddy is not in a hole && - * (b) the buddy is free && - * (c) the buddy is on the buddy system && - * (d) a page and its buddy have the same order. - * for recording page's order, we use page_private(page) and PG_private. + * (b) the buddy is in the buddy system && + * (c) a page and its buddy have the same order. + * + * For recording whether a page is in the buddy system, we use PG_buddy. + * Setting, clearing, and testing PG_buddy is serialized by zone->lock. * + * For recording page's order, we use page_private(page). */ static inline int page_is_buddy(struct page *page, int order) { @@ -293,10 +297,10 @@ static inline int page_is_buddy(struct page *page, int order) return 0; #endif - if (PagePrivate(page) && - (page_order(page) == order) && - page_count(page) == 0) + if (PageBuddy(page) && page_order(page) == order) { + BUG_ON(page_count(page) != 0); return 1; + } return 0; } @@ -313,7 +317,7 @@ static inline int page_is_buddy(struct page *page, int order) * as necessary, plus some accounting needed to play nicely with other * parts of the VM system. * At each level, we keep a list of pages, which are heads of continuous - * free pages of length of (1 << order) and marked with PG_Private.Page's + * free pages of length of (1 << order) and marked with PG_buddy. Page's * order is recorded in page_private(page) field. * So when we are allocating or freeing one, we can derive the state of the * other. That is, if we allocate a small block, and both were @@ -376,7 +380,8 @@ static inline int free_pages_check(struct page *page) 1 << PG_slab | 1 << PG_swapcache | 1 << PG_writeback | - 1 << PG_reserved )))) + 1 << PG_reserved | + 1 << PG_buddy )))) bad_page(page); if (PageDirty(page)) __ClearPageDirty(page); @@ -524,7 +529,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) 1 << PG_slab | 1 << PG_swapcache | 1 << PG_writeback | - 1 << PG_reserved )))) + 1 << PG_reserved | + 1 << PG_buddy )))) bad_page(page); /* @@ -2472,6 +2478,38 @@ void __init page_alloc_init(void) } /* + * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio + * or min_free_kbytes changes. + */ +static void calculate_totalreserve_pages(void) +{ + struct pglist_data *pgdat; + unsigned long reserve_pages = 0; + int i, j; + + for_each_online_pgdat(pgdat) { + for (i = 0; i < MAX_NR_ZONES; i++) { + struct zone *zone = pgdat->node_zones + i; + unsigned long max = 0; + + /* Find valid and maximum lowmem_reserve in the zone */ + for (j = i; j < MAX_NR_ZONES; j++) { + if (zone->lowmem_reserve[j] > max) + max = zone->lowmem_reserve[j]; + } + + /* we treat pages_high as reserved pages. */ + max += zone->pages_high; + + if (max > zone->present_pages) + max = zone->present_pages; + reserve_pages += max; + } + } + totalreserve_pages = reserve_pages; +} + +/* * setup_per_zone_lowmem_reserve - called whenever * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone * has a correct pages reserved value, so an adequate number of @@ -2502,6 +2540,9 @@ static void setup_per_zone_lowmem_reserve(void) } } } + + /* update totalreserve_pages */ + calculate_totalreserve_pages(); } /* @@ -2556,6 +2597,9 @@ void setup_per_zone_pages_min(void) zone->pages_high = zone->pages_min + tmp / 2; spin_unlock_irqrestore(&zone->lru_lock, flags); } + + /* update totalreserve_pages */ + calculate_totalreserve_pages(); } /* @@ -420,6 +420,7 @@ struct kmem_cache { unsigned long max_freeable; unsigned long node_allocs; unsigned long node_frees; + unsigned long node_overflow; atomic_t allochit; atomic_t allocmiss; atomic_t freehit; @@ -465,6 +466,7 @@ struct kmem_cache { #define STATS_INC_ERR(x) ((x)->errors++) #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) #define STATS_INC_NODEFREES(x) ((x)->node_frees++) +#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++) #define STATS_SET_FREEABLE(x, i) \ do { \ if ((x)->max_freeable < i) \ @@ -484,6 +486,7 @@ struct kmem_cache { #define STATS_INC_ERR(x) do { } while (0) #define STATS_INC_NODEALLOCS(x) do { } while (0) #define STATS_INC_NODEFREES(x) do { } while (0) +#define STATS_INC_ACOVERFLOW(x) do { } while (0) #define STATS_SET_FREEABLE(x, i) do { } while (0) #define STATS_INC_ALLOCHIT(x) do { } while (0) #define STATS_INC_ALLOCMISS(x) do { } while (0) @@ -1453,7 +1456,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) int i; flags |= cachep->gfpflags; +#ifndef CONFIG_MMU + /* nommu uses slab's for process anonymous memory allocations, so + * requires __GFP_COMP to properly refcount higher order allocations" + */ + page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder); +#else page = alloc_pages_node(nodeid, flags, cachep->gfporder); +#endif if (!page) return NULL; addr = page_address(page); @@ -2318,13 +2328,15 @@ EXPORT_SYMBOL(kmem_cache_destroy); /* Get the memory for a slab management obj. */ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, - int colour_off, gfp_t local_flags) + int colour_off, gfp_t local_flags, + int nodeid) { struct slab *slabp; if (OFF_SLAB(cachep)) { /* Slab management obj is off-slab. */ - slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); + slabp = kmem_cache_alloc_node(cachep->slabp_cache, + local_flags, nodeid); if (!slabp) return NULL; } else { @@ -2334,6 +2346,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, slabp->inuse = 0; slabp->colouroff = colour_off; slabp->s_mem = objp + colour_off; + slabp->nodeid = nodeid; return slabp; } @@ -2519,7 +2532,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) goto failed; /* Get slab management. */ - slabp = alloc_slabmgmt(cachep, objp, offset, local_flags); + slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid); if (!slabp) goto opps1; @@ -3080,9 +3093,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) if (l3->alien && l3->alien[nodeid]) { alien = l3->alien[nodeid]; spin_lock(&alien->lock); - if (unlikely(alien->avail == alien->limit)) + if (unlikely(alien->avail == alien->limit)) { + STATS_INC_ACOVERFLOW(cachep); __drain_alien_cache(cachep, alien, nodeid); + } alien->entry[alien->avail++] = objp; spin_unlock(&alien->lock); } else { @@ -3760,7 +3775,7 @@ static void print_slabinfo_header(struct seq_file *m) seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); #if STATS seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " - "<error> <maxfreeable> <nodeallocs> <remotefrees>"); + "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); #endif seq_putc(m, '\n'); @@ -3874,11 +3889,12 @@ static int s_show(struct seq_file *m, void *p) unsigned long max_freeable = cachep->max_freeable; unsigned long node_allocs = cachep->node_allocs; unsigned long node_frees = cachep->node_frees; + unsigned long overflows = cachep->node_overflow; seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ - %4lu %4lu %4lu %4lu", allocs, high, grown, + %4lu %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, - node_frees); + node_frees, overflows); } /* cpu stats */ { |