diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-04 08:52:14 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-04 08:52:14 +0100 |
commit | cb9c34e6d090d376b77becaa5d29a65dec7f4272 (patch) | |
tree | 3678abce20d6825aebe3fec218057d4131e13fd6 /mm | |
parent | 470c66239ef0336429b35345f3f615d47341e13b (diff) | |
parent | 061e41fdb5047b1fb161e89664057835935ca1d2 (diff) | |
download | op-kernel-dev-cb9c34e6d090d376b77becaa5d29a65dec7f4272.zip op-kernel-dev-cb9c34e6d090d376b77becaa5d29a65dec7f4272.tar.gz |
Merge commit 'v2.6.28-rc7' into core/locking
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 5 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 12 | ||||
-rw-r--r-- | mm/migrate.c | 5 | ||||
-rw-r--r-- | mm/mlock.c | 18 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 4 | ||||
-rw-r--r-- | mm/page_cgroup.c | 60 | ||||
-rw-r--r-- | mm/slub.c | 6 | ||||
-rw-r--r-- | mm/sparse.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 41 | ||||
-rw-r--r-- | mm/vmscan.c | 46 |
11 files changed, 103 insertions, 98 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d143ab6..6058b53 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1796,6 +1796,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, unsigned long address) { + struct hstate *h = hstate_vma(vma); struct vm_area_struct *iter_vma; struct address_space *mapping; struct prio_tree_iter iter; @@ -1805,7 +1806,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * vm_pgoff is in PAGE_SIZE units, hence the different calculation * from page cache lookup which is in HPAGE_SIZE units. */ - address = address & huge_page_mask(hstate_vma(vma)); + address = address & huge_page_mask(h); pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + (vma->vm_pgoff >> PAGE_SHIFT); mapping = (struct address_space *)page_private(page); @@ -1824,7 +1825,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, */ if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) unmap_hugepage_range(iter_vma, - address, address + HPAGE_SIZE, + address, address + huge_page_size(h), page); } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6837a10..b1737118 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -22,7 +22,6 @@ #include <linux/highmem.h> #include <linux/vmalloc.h> #include <linux/ioport.h> -#include <linux/cpuset.h> #include <linux/delay.h> #include <linux/migrate.h> #include <linux/page-isolation.h> @@ -190,7 +189,7 @@ static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, pgdat->node_start_pfn; } -static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) +static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) { struct pglist_data *pgdat = zone->zone_pgdat; int nr_pages = PAGES_PER_SECTION; @@ -217,7 +216,7 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) return 0; } -static int __add_section(struct zone *zone, unsigned long phys_start_pfn) +static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn) { int nr_pages = PAGES_PER_SECTION; int ret; @@ -274,7 +273,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms) * call this function after deciding the zone to which to * add the new pages. */ -int __add_pages(struct zone *zone, unsigned long phys_start_pfn, +int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn, unsigned long nr_pages) { unsigned long i; @@ -471,7 +470,8 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat) } -int add_memory(int nid, u64 start, u64 size) +/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ +int __ref add_memory(int nid, u64 start, u64 size) { pg_data_t *pgdat = NULL; int new_pgdat = 0; @@ -498,8 +498,6 @@ int add_memory(int nid, u64 start, u64 size) /* we online node here. we can't roll back from here. */ node_set_online(nid); - cpuset_track_online_nodes(); - if (new_pgdat) { ret = register_one_node(nid); /* diff --git a/mm/migrate.c b/mm/migrate.c index 385db89..1e0d6b2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -522,15 +522,12 @@ static int writeout(struct address_space *mapping, struct page *page) remove_migration_ptes(page, page); rc = mapping->a_ops->writepage(page, &wbc); - if (rc < 0) - /* I/O Error writing */ - return -EIO; if (rc != AOP_WRITEPAGE_ACTIVATE) /* unlocked. Relock */ lock_page(page); - return -EAGAIN; + return (rc < 0) ? -EIO : -EAGAIN; } /* @@ -66,14 +66,10 @@ void __clear_page_mlock(struct page *page) putback_lru_page(page); } else { /* - * Page not on the LRU yet. Flush all pagevecs and retry. + * We lost the race. the page already moved to evictable list. */ - lru_add_drain_all(); - if (!isolate_lru_page(page)) - putback_lru_page(page); - else if (PageUnevictable(page)) + if (PageUnevictable(page)) count_vm_event(UNEVICTABLE_PGSTRANDED); - } } @@ -166,7 +162,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long addr = start; struct page *pages[16]; /* 16 gives a reasonable batch */ int nr_pages = (end - start) / PAGE_SIZE; - int ret; + int ret = 0; int gup_flags = 0; VM_BUG_ON(start & ~PAGE_MASK); @@ -187,8 +183,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, if (vma->vm_flags & VM_WRITE) gup_flags |= GUP_FLAGS_WRITE; - lru_add_drain_all(); /* push cached pages to LRU */ - while (nr_pages > 0) { int i; @@ -251,8 +245,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, ret = 0; } - lru_add_drain_all(); /* to update stats */ - return ret; /* count entire vma as locked_vm */ } @@ -546,6 +538,8 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) if (!can_do_mlock()) return -EPERM; + lru_add_drain_all(); /* flush pagevec */ + down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; @@ -612,6 +606,8 @@ asmlinkage long sys_mlockall(int flags) if (!can_do_mlock()) goto out; + lru_add_drain_all(); /* flush pagevec */ + down_write(¤t->mm->mmap_sem); lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; @@ -1704,7 +1704,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; - if (expand_stack(prev, addr)) + if (!prev || expand_stack(prev, addr)) return NULL; if (prev->vm_flags & VM_LOCKED) { if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 54069e6..d8ac014 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1561,6 +1561,10 @@ nofail_alloc: /* We now go into synchronous reclaim */ cpuset_memory_pressure_bump(); + /* + * The task's cpuset might have expanded its set of allowable nodes + */ + cpuset_update_task_memory_state(); p->flags |= PF_MEMALLOC; reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index f59d797..0b3cbf0 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -21,7 +21,7 @@ static unsigned long total_usage; #if !defined(CONFIG_SPARSEMEM) -void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) +void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) { pgdat->node_page_cgroup = NULL; } @@ -97,7 +97,8 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return section->page_cgroup + pfn; } -int __meminit init_section_page_cgroup(unsigned long pfn) +/* __alloc_bootmem...() is protected by !slab_available() */ +int __init_refok init_section_page_cgroup(unsigned long pfn) { struct mem_section *section; struct page_cgroup *base, *pc; @@ -106,19 +107,29 @@ int __meminit init_section_page_cgroup(unsigned long pfn) section = __pfn_to_section(pfn); - if (section->page_cgroup) - return 0; - - nid = page_to_nid(pfn_to_page(pfn)); - - table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; - if (slab_is_available()) { - base = kmalloc_node(table_size, GFP_KERNEL, nid); - if (!base) - base = vmalloc_node(table_size, nid); - } else { - base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size, + if (!section->page_cgroup) { + nid = page_to_nid(pfn_to_page(pfn)); + table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; + if (slab_is_available()) { + base = kmalloc_node(table_size, GFP_KERNEL, nid); + if (!base) + base = vmalloc_node(table_size, nid); + } else { + base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), + table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + } + } else { + /* + * We don't have to allocate page_cgroup again, but + * address of memmap may be changed. So, we have to initialize + * again. + */ + base = section->page_cgroup + pfn; + table_size = 0; + /* check address of memmap is changed or not. */ + if (base->page == pfn_to_page(pfn)) + return 0; } if (!base) { @@ -158,14 +169,14 @@ void __free_page_cgroup(unsigned long pfn) } } -int online_page_cgroup(unsigned long start_pfn, +int __meminit online_page_cgroup(unsigned long start_pfn, unsigned long nr_pages, int nid) { unsigned long start, end, pfn; int fail = 0; - start = start_pfn & (PAGES_PER_SECTION - 1); + start = start_pfn & ~(PAGES_PER_SECTION - 1); end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { @@ -183,12 +194,12 @@ int online_page_cgroup(unsigned long start_pfn, return -ENOMEM; } -int offline_page_cgroup(unsigned long start_pfn, +int __meminit offline_page_cgroup(unsigned long start_pfn, unsigned long nr_pages, int nid) { unsigned long start, end, pfn; - start = start_pfn & (PAGES_PER_SECTION - 1); + start = start_pfn & ~(PAGES_PER_SECTION - 1); end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) @@ -197,7 +208,7 @@ int offline_page_cgroup(unsigned long start_pfn, } -static int page_cgroup_callback(struct notifier_block *self, +static int __meminit page_cgroup_callback(struct notifier_block *self, unsigned long action, void *arg) { struct memory_notify *mn = arg; @@ -207,18 +218,23 @@ static int page_cgroup_callback(struct notifier_block *self, ret = online_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; - case MEM_CANCEL_ONLINE: case MEM_OFFLINE: offline_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; + case MEM_CANCEL_ONLINE: case MEM_GOING_OFFLINE: break; case MEM_ONLINE: case MEM_CANCEL_OFFLINE: break; } - ret = notifier_from_errno(ret); + + if (ret) + ret = notifier_from_errno(ret); + else + ret = NOTIFY_OK; + return ret; } @@ -248,7 +264,7 @@ void __init page_cgroup_init(void) " want\n"); } -void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) +void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) { return; } @@ -2931,8 +2931,10 @@ static int slab_memory_callback(struct notifier_block *self, case MEM_CANCEL_OFFLINE: break; } - - ret = notifier_from_errno(ret); + if (ret) + ret = notifier_from_errno(ret); + else + ret = NOTIFY_OK; return ret; } diff --git a/mm/sparse.c b/mm/sparse.c index 39db301..083f5b6 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -570,7 +570,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ -int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, +int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages) { unsigned long section_nr = pfn_to_section_nr(start_pfn); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ba6b0f5..f3f6e07 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -77,7 +77,6 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) BUG_ON(addr >= end); pgd = pgd_offset_k(addr); - flush_cache_vunmap(addr, end); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) @@ -324,14 +323,14 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, BUG_ON(size & ~PAGE_MASK); - addr = ALIGN(vstart, align); - va = kmalloc_node(sizeof(struct vmap_area), gfp_mask & GFP_RECLAIM_MASK, node); if (unlikely(!va)) return ERR_PTR(-ENOMEM); retry: + addr = ALIGN(vstart, align); + spin_lock(&vmap_area_lock); /* XXX: could have a last_hole cache */ n = vmap_area_root.rb_node; @@ -362,7 +361,7 @@ retry: goto found; } - while (addr + size >= first->va_start && addr + size <= vend) { + while (addr + size > first->va_start && addr + size <= vend) { addr = ALIGN(first->va_end + PAGE_SIZE, align); n = rb_next(&first->rb_node); @@ -522,24 +521,45 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, } /* + * Kick off a purge of the outstanding lazy areas. Don't bother if somebody + * is already purging. + */ +static void try_purge_vmap_area_lazy(void) +{ + unsigned long start = ULONG_MAX, end = 0; + + __purge_vmap_area_lazy(&start, &end, 0, 0); +} + +/* * Kick off a purge of the outstanding lazy areas. */ static void purge_vmap_area_lazy(void) { unsigned long start = ULONG_MAX, end = 0; - __purge_vmap_area_lazy(&start, &end, 0, 0); + __purge_vmap_area_lazy(&start, &end, 1, 0); } /* - * Free and unmap a vmap area + * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been + * called for the correct range previously. */ -static void free_unmap_vmap_area(struct vmap_area *va) +static void free_unmap_vmap_area_noflush(struct vmap_area *va) { va->flags |= VM_LAZY_FREE; atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) - purge_vmap_area_lazy(); + try_purge_vmap_area_lazy(); +} + +/* + * Free and unmap a vmap area + */ +static void free_unmap_vmap_area(struct vmap_area *va) +{ + flush_cache_vunmap(va->va_start, va->va_end); + free_unmap_vmap_area_noflush(va); } static struct vmap_area *find_vmap_area(unsigned long addr) @@ -723,7 +743,7 @@ static void free_vmap_block(struct vmap_block *vb) spin_unlock(&vmap_block_tree_lock); BUG_ON(tmp != vb); - free_unmap_vmap_area(vb->va); + free_unmap_vmap_area_noflush(vb->va); call_rcu(&vb->rcu_head, rcu_free_vb); } @@ -785,6 +805,9 @@ static void vb_free(const void *addr, unsigned long size) BUG_ON(size & ~PAGE_MASK); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); + + flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size); + order = get_order(size); offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); diff --git a/mm/vmscan.c b/mm/vmscan.c index 3b58602..62e7f62 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -623,6 +623,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, * Try to allocate it some swap space here. */ if (PageAnon(page) && !PageSwapCache(page)) { + if (!(sc->gfp_mask & __GFP_IO)) + goto keep_locked; switch (try_to_munlock(page)) { case SWAP_FAIL: /* shouldn't happen */ case SWAP_AGAIN: @@ -634,6 +636,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, } if (!add_to_swap(page, GFP_ATOMIC)) goto activate_locked; + may_enter_fs = 1; } #endif /* CONFIG_SWAP */ @@ -1245,6 +1248,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, list_add(&page->lru, &l_inactive); } + spin_lock_irq(&zone->lru_lock); /* * Count referenced pages from currently used mappings as * rotated, even though they are moved to the inactive list. @@ -1260,7 +1264,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, pgmoved = 0; lru = LRU_BASE + file * LRU_FILE; - spin_lock_irq(&zone->lru_lock); while (!list_empty(&l_inactive)) { page = lru_to_page(&l_inactive); prefetchw_prev_lru_page(page, &l_inactive, flags); @@ -1386,9 +1389,9 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, file_prio = 200 - sc->swappiness; /* - * anon recent_rotated[0] - * %anon = 100 * ----------- / ----------------- * IO cost - * anon + file rotate_sum + * The amount of pressure on anon vs file pages is inversely + * proportional to the fraction of recently scanned pages on + * each list that were recently referenced and in active use. */ ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); ap /= zone->recent_rotated[0] + 1; @@ -2368,39 +2371,6 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) return 1; } -static void show_page_path(struct page *page) -{ - char buf[256]; - if (page_is_file_cache(page)) { - struct address_space *mapping = page->mapping; - struct dentry *dentry; - pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - - spin_lock(&mapping->i_mmap_lock); - dentry = d_find_alias(mapping->host); - printk(KERN_INFO "rescued: %s %lu\n", - dentry_path(dentry, buf, 256), pgoff); - spin_unlock(&mapping->i_mmap_lock); - } else { -#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU) - struct anon_vma *anon_vma; - struct vm_area_struct *vma; - - anon_vma = page_lock_anon_vma(page); - if (!anon_vma) - return; - - list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { - printk(KERN_INFO "rescued: anon %s\n", - vma->vm_mm->owner->comm); - break; - } - page_unlock_anon_vma(anon_vma); -#endif - } -} - - /** * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list * @page: page to check evictability and move to appropriate lru list @@ -2421,8 +2391,6 @@ retry: if (page_evictable(page, NULL)) { enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); - show_page_path(page); - __dec_zone_state(zone, NR_UNEVICTABLE); list_move(&page->lru, &zone->lru[l].list); __inc_zone_state(zone, NR_INACTIVE_ANON + l); |