diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 11 | ||||
-rw-r--r-- | mm/internal.h | 26 | ||||
-rw-r--r-- | mm/memcontrol.c | 73 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 31 | ||||
-rw-r--r-- | mm/swap.c | 42 | ||||
-rw-r--r-- | mm/vmscan.c | 149 |
7 files changed, 272 insertions, 62 deletions
@@ -209,5 +209,16 @@ config VIRT_TO_BUS def_bool y depends on !ARCH_NO_VIRT_TO_BUS +config UNEVICTABLE_LRU + bool "Add LRU list to track non-evictable pages" + default y + depends on MMU + help + Keeps unevictable pages off of the active and inactive pageout + lists, so kswapd will not waste CPU time or have its balancing + algorithms thrown off by scanning these pages. Selecting this + will use one page flag and increase the code size a little, + say Y unless you know what you are doing. + config MMU_NOTIFIER bool diff --git a/mm/internal.h b/mm/internal.h index 4e8e78b..3db17b2 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -39,8 +39,15 @@ static inline void __put_page(struct page *page) atomic_dec(&page->_count); } +/* + * in mm/vmscan.c: + */ extern int isolate_lru_page(struct page *page); +extern void putback_lru_page(struct page *page); +/* + * in mm/page_alloc.c + */ extern void __free_pages_bootmem(struct page *page, unsigned int order); /* @@ -54,6 +61,25 @@ static inline unsigned long page_order(struct page *page) return page_private(page); } +#ifdef CONFIG_UNEVICTABLE_LRU +/* + * unevictable_migrate_page() called only from migrate_page_copy() to + * migrate unevictable flag to new page. + * Note that the old page has been isolated from the LRU lists at this + * point so we don't need to worry about LRU statistics. + */ +static inline void unevictable_migrate_page(struct page *new, struct page *old) +{ + if (TestClearPageUnevictable(old)) + SetPageUnevictable(new); +} +#else +static inline void unevictable_migrate_page(struct page *new, struct page *old) +{ +} +#endif + + /* * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, * so all functions starting at paging_init should be marked __init diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 27e9e75..82c065e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -160,9 +160,10 @@ struct page_cgroup { struct mem_cgroup *mem_cgroup; int flags; }; -#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ -#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ -#define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */ +#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ +#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ +#define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */ +#define PAGE_CGROUP_FLAG_UNEVICTABLE (0x8) /* page is unevictableable */ static int page_cgroup_nid(struct page_cgroup *pc) { @@ -292,10 +293,14 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, { int lru = LRU_BASE; - if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) - lru += LRU_ACTIVE; - if (pc->flags & PAGE_CGROUP_FLAG_FILE) - lru += LRU_FILE; + if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE) + lru = LRU_UNEVICTABLE; + else { + if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) + lru += LRU_ACTIVE; + if (pc->flags & PAGE_CGROUP_FLAG_FILE) + lru += LRU_FILE; + } MEM_CGROUP_ZSTAT(mz, lru) -= 1; @@ -308,10 +313,14 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, { int lru = LRU_BASE; - if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) - lru += LRU_ACTIVE; - if (pc->flags & PAGE_CGROUP_FLAG_FILE) - lru += LRU_FILE; + if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE) + lru = LRU_UNEVICTABLE; + else { + if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) + lru += LRU_ACTIVE; + if (pc->flags & PAGE_CGROUP_FLAG_FILE) + lru += LRU_FILE; + } MEM_CGROUP_ZSTAT(mz, lru) += 1; list_add(&pc->lru, &mz->lists[lru]); @@ -319,21 +328,31 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); } -static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) +static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru) { struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); - int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; - int file = pc->flags & PAGE_CGROUP_FLAG_FILE; - int lru = LRU_FILE * !!file + !!from; + int active = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; + int file = pc->flags & PAGE_CGROUP_FLAG_FILE; + int unevictable = pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE; + enum lru_list from = unevictable ? LRU_UNEVICTABLE : + (LRU_FILE * !!file + !!active); - MEM_CGROUP_ZSTAT(mz, lru) -= 1; + if (lru == from) + return; - if (active) - pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; - else + MEM_CGROUP_ZSTAT(mz, from) -= 1; + + if (is_unevictable_lru(lru)) { pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; + pc->flags |= PAGE_CGROUP_FLAG_UNEVICTABLE; + } else { + if (is_active_lru(lru)) + pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; + else + pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; + pc->flags &= ~PAGE_CGROUP_FLAG_UNEVICTABLE; + } - lru = LRU_FILE * !!file + !!active; MEM_CGROUP_ZSTAT(mz, lru) += 1; list_move(&pc->lru, &mz->lists[lru]); } @@ -351,7 +370,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) /* * This routine assumes that the appropriate zone's lru lock is already held */ -void mem_cgroup_move_lists(struct page *page, bool active) +void mem_cgroup_move_lists(struct page *page, enum lru_list lru) { struct page_cgroup *pc; struct mem_cgroup_per_zone *mz; @@ -374,7 +393,7 @@ void mem_cgroup_move_lists(struct page *page, bool active) if (pc) { mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_move_lists(pc, active); + __mem_cgroup_move_lists(pc, lru); spin_unlock_irqrestore(&mz->lru_lock, flags); } unlock_page_cgroup(page); @@ -472,12 +491,10 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, /* * TODO: play better with lumpy reclaim, grabbing anything. */ - if (PageActive(page) && !active) { - __mem_cgroup_move_lists(pc, true); - continue; - } - if (!PageActive(page) && active) { - __mem_cgroup_move_lists(pc, false); + if (PageUnevictable(page) || + (PageActive(page) && !active) || + (!PageActive(page) && active)) { + __mem_cgroup_move_lists(pc, page_lru(page)); continue; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 71b4749..36f4257 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2202,7 +2202,7 @@ static void gather_stats(struct page *page, void *private, int pte_dirty) if (PageSwapCache(page)) md->swapcache++; - if (PageActive(page)) + if (PageActive(page) || PageUnevictable(page)) md->active++; if (PageWriteback(page)) diff --git a/mm/migrate.c b/mm/migrate.c index c073274..b10237d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -53,14 +53,9 @@ int migrate_prep(void) return 0; } -static inline void move_to_lru(struct page *page) -{ - lru_cache_add_lru(page, page_lru(page)); - put_page(page); -} - /* - * Add isolated pages on the list back to the LRU. + * Add isolated pages on the list back to the LRU under page lock + * to avoid leaking evictable pages back onto unevictable list. * * returns the number of pages put back. */ @@ -72,7 +67,7 @@ int putback_lru_pages(struct list_head *l) list_for_each_entry_safe(page, page2, l, lru) { list_del(&page->lru); - move_to_lru(page); + putback_lru_page(page); count++; } return count; @@ -354,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page) SetPageReferenced(newpage); if (PageUptodate(page)) SetPageUptodate(newpage); - if (PageActive(page)) + if (TestClearPageActive(page)) { + VM_BUG_ON(PageUnevictable(page)); SetPageActive(newpage); + } else + unevictable_migrate_page(newpage, page); if (PageChecked(page)) SetPageChecked(newpage); if (PageMappedToDisk(page)) @@ -376,7 +374,6 @@ static void migrate_page_copy(struct page *newpage, struct page *page) #ifdef CONFIG_SWAP ClearPageSwapCache(page); #endif - ClearPageActive(page); ClearPagePrivate(page); set_page_private(page, 0); page->mapping = NULL; @@ -555,6 +552,10 @@ static int fallback_migrate_page(struct address_space *mapping, * * The new page will have replaced the old page if this function * is successful. + * + * Return value: + * < 0 - error code + * == 0 - success */ static int move_to_new_page(struct page *newpage, struct page *page) { @@ -617,9 +618,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, if (!newpage) return -ENOMEM; - if (page_count(page) == 1) + if (page_count(page) == 1) { /* page was freed from under us. So we are done. */ goto move_newpage; + } charge = mem_cgroup_prepare_migration(page, newpage); if (charge == -ENOMEM) { @@ -693,7 +695,6 @@ rcu_unlock: rcu_read_unlock(); unlock: - unlock_page(page); if (rc != -EAGAIN) { @@ -704,17 +705,19 @@ unlock: * restored. */ list_del(&page->lru); - move_to_lru(page); + putback_lru_page(page); } move_newpage: if (!charge) mem_cgroup_end_migration(newpage); + /* * Move the new page to the LRU. If migration was not successful * then this will free the page. */ - move_to_lru(newpage); + putback_lru_page(newpage); + if (result) { if (rc) *result = rc; @@ -115,7 +115,7 @@ static void pagevec_move_tail(struct pagevec *pvec) zone = pagezone; spin_lock(&zone->lru_lock); } - if (PageLRU(page) && !PageActive(page)) { + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int lru = page_is_file_cache(page); list_move_tail(&page->lru, &zone->lru[lru].list); pgmoved++; @@ -136,7 +136,7 @@ static void pagevec_move_tail(struct pagevec *pvec) void rotate_reclaimable_page(struct page *page) { if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && - PageLRU(page)) { + !PageUnevictable(page) && PageLRU(page)) { struct pagevec *pvec; unsigned long flags; @@ -157,7 +157,7 @@ void activate_page(struct page *page) struct zone *zone = page_zone(page); spin_lock_irq(&zone->lru_lock); - if (PageLRU(page) && !PageActive(page)) { + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int file = page_is_file_cache(page); int lru = LRU_BASE + file; del_page_from_lru_list(zone, page, lru); @@ -166,7 +166,7 @@ void activate_page(struct page *page) lru += LRU_ACTIVE; add_page_to_lru_list(zone, page, lru); __count_vm_event(PGACTIVATE); - mem_cgroup_move_lists(page, true); + mem_cgroup_move_lists(page, lru); zone->recent_rotated[!!file]++; zone->recent_scanned[!!file]++; @@ -183,7 +183,8 @@ void activate_page(struct page *page) */ void mark_page_accessed(struct page *page) { - if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) { + if (!PageActive(page) && !PageUnevictable(page) && + PageReferenced(page) && PageLRU(page)) { activate_page(page); ClearPageReferenced(page); } else if (!PageReferenced(page)) { @@ -211,13 +212,38 @@ void __lru_cache_add(struct page *page, enum lru_list lru) void lru_cache_add_lru(struct page *page, enum lru_list lru) { if (PageActive(page)) { + VM_BUG_ON(PageUnevictable(page)); ClearPageActive(page); + } else if (PageUnevictable(page)) { + VM_BUG_ON(PageActive(page)); + ClearPageUnevictable(page); } - VM_BUG_ON(PageLRU(page) || PageActive(page)); + VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); __lru_cache_add(page, lru); } +/** + * add_page_to_unevictable_list - add a page to the unevictable list + * @page: the page to be added to the unevictable list + * + * Add page directly to its zone's unevictable list. To avoid races with + * tasks that might be making the page evictable, through eg. munlock, + * munmap or exit, while it's not on the lru, we want to add the page + * while it's locked or otherwise "invisible" to other tasks. This is + * difficult to do when using the pagevec cache, so bypass that. + */ +void add_page_to_unevictable_list(struct page *page) +{ + struct zone *zone = page_zone(page); + + spin_lock_irq(&zone->lru_lock); + SetPageUnevictable(page); + SetPageLRU(page); + add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); + spin_unlock_irq(&zone->lru_lock); +} + /* * Drain pages out of the cpu's pagevecs. * Either "cpu" is the current CPU, and preemption has already been @@ -316,6 +342,7 @@ void release_pages(struct page **pages, int nr, int cold) if (PageLRU(page)) { struct zone *pagezone = page_zone(page); + if (pagezone != zone) { if (zone) spin_unlock_irqrestore(&zone->lru_lock, @@ -392,6 +419,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) { int i; struct zone *zone = NULL; + VM_BUG_ON(is_unevictable_lru(lru)); for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; @@ -403,6 +431,8 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) zone = pagezone; spin_lock_irq(&zone->lru_lock); } + VM_BUG_ON(PageActive(page)); + VM_BUG_ON(PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); if (is_active_lru(lru)) diff --git a/mm/vmscan.c b/mm/vmscan.c index a8347b6..154b9b6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -470,6 +470,79 @@ int remove_mapping(struct address_space *mapping, struct page *page) return 0; } +/** + * putback_lru_page - put previously isolated page onto appropriate LRU list + * @page: page to be put back to appropriate lru list + * + * Add previously isolated @page to appropriate LRU list. + * Page may still be unevictable for other reasons. + * + * lru_lock must not be held, interrupts must be enabled. + */ +#ifdef CONFIG_UNEVICTABLE_LRU +void putback_lru_page(struct page *page) +{ + int lru; + int active = !!TestClearPageActive(page); + + VM_BUG_ON(PageLRU(page)); + +redo: + ClearPageUnevictable(page); + + if (page_evictable(page, NULL)) { + /* + * For evictable pages, we can use the cache. + * In event of a race, worst case is we end up with an + * unevictable page on [in]active list. + * We know how to handle that. + */ + lru = active + page_is_file_cache(page); + lru_cache_add_lru(page, lru); + } else { + /* + * Put unevictable pages directly on zone's unevictable + * list. + */ + lru = LRU_UNEVICTABLE; + add_page_to_unevictable_list(page); + } + mem_cgroup_move_lists(page, lru); + + /* + * page's status can change while we move it among lru. If an evictable + * page is on unevictable list, it never be freed. To avoid that, + * check after we added it to the list, again. + */ + if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) { + if (!isolate_lru_page(page)) { + put_page(page); + goto redo; + } + /* This means someone else dropped this page from LRU + * So, it will be freed or putback to LRU again. There is + * nothing to do here. + */ + } + + put_page(page); /* drop ref from isolate */ +} + +#else /* CONFIG_UNEVICTABLE_LRU */ + +void putback_lru_page(struct page *page) +{ + int lru; + VM_BUG_ON(PageLRU(page)); + + lru = !!TestClearPageActive(page) + page_is_file_cache(page); + lru_cache_add_lru(page, lru); + mem_cgroup_move_lists(page, lru); + put_page(page); +} +#endif /* CONFIG_UNEVICTABLE_LRU */ + + /* * shrink_page_list() returns the number of reclaimed pages */ @@ -503,6 +576,12 @@ static unsigned long shrink_page_list(struct list_head *page_list, sc->nr_scanned++; + if (unlikely(!page_evictable(page, NULL))) { + unlock_page(page); + putback_lru_page(page); + continue; + } + if (!sc->may_swap && page_mapped(page)) goto keep_locked; @@ -602,7 +681,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, * possible for a page to have PageDirty set, but it is actually * clean (all its buffers are clean). This happens if the * buffers were written out directly, with submit_bh(). ext3 - * will do this, as well as the blockdev mapping. + * will do this, as well as the blockdev mapping. * try_to_release_page() will discover that cleanness and will * drop the buffers and mark the page clean - it can be freed. * @@ -650,6 +729,7 @@ activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ if (PageSwapCache(page) && vm_swap_full()) remove_exclusive_swap_page_ref(page); + VM_BUG_ON(PageActive(page)); SetPageActive(page); pgactivate++; keep_locked: @@ -699,6 +779,14 @@ int __isolate_lru_page(struct page *page, int mode, int file) if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) return ret; + /* + * When this function is being called for lumpy reclaim, we + * initially look into all LRU pages, active, inactive and + * unevictable; only give shrink_page_list evictable pages. + */ + if (PageUnevictable(page)) + return ret; + ret = -EBUSY; if (likely(get_page_unless_zero(page))) { /* @@ -810,7 +898,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, /* else it is being freed elsewhere */ list_move(&cursor_page->lru, src); default: - break; + break; /* ! on LRU or wrong list */ } } } @@ -870,8 +958,9 @@ static unsigned long clear_active_flags(struct list_head *page_list, * Returns -EBUSY if the page was not on an LRU list. * * The returned page will have PageLRU() cleared. If it was found on - * the active list, it will have PageActive set. That flag may need - * to be cleared by the caller before letting the page go. + * the active list, it will have PageActive set. If it was found on + * the unevictable list, it will have the PageUnevictable bit set. That flag + * may need to be cleared by the caller before letting the page go. * * The vmstat statistic corresponding to the list on which the page was * found will be decremented. @@ -892,11 +981,10 @@ int isolate_lru_page(struct page *page) spin_lock_irq(&zone->lru_lock); if (PageLRU(page) && get_page_unless_zero(page)) { - int lru = LRU_BASE; + int lru = page_lru(page); ret = 0; ClearPageLRU(page); - lru += page_is_file_cache(page) + !!PageActive(page); del_page_from_lru_list(zone, page, lru); } spin_unlock_irq(&zone->lru_lock); @@ -1008,11 +1096,20 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, * Put back any unfreeable pages. */ while (!list_empty(&page_list)) { + int lru; page = lru_to_page(&page_list); VM_BUG_ON(PageLRU(page)); - SetPageLRU(page); list_del(&page->lru); - add_page_to_lru_list(zone, page, page_lru(page)); + if (unlikely(!page_evictable(page, NULL))) { + spin_unlock_irq(&zone->lru_lock); + putback_lru_page(page); + spin_lock_irq(&zone->lru_lock); + continue; + } + SetPageLRU(page); + lru = page_lru(page); + add_page_to_lru_list(zone, page, lru); + mem_cgroup_move_lists(page, lru); if (PageActive(page) && scan_global_lru(sc)) { int file = !!page_is_file_cache(page); zone->recent_rotated[file]++; @@ -1107,6 +1204,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, page = lru_to_page(&l_hold); list_del(&page->lru); + if (unlikely(!page_evictable(page, NULL))) { + putback_lru_page(page); + continue; + } + /* page_referenced clears PageReferenced */ if (page_mapping_inuse(page) && page_referenced(page, 0, sc->mem_cgroup)) @@ -1140,7 +1242,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, ClearPageActive(page); list_move(&page->lru, &zone->lru[lru].list); - mem_cgroup_move_lists(page, false); + mem_cgroup_move_lists(page, lru); pgmoved++; if (!pagevec_add(&pvec, page)) { __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); @@ -1286,7 +1388,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone, get_scan_ratio(zone, sc, percent); - for_each_lru(l) { + for_each_evictable_lru(l) { if (scan_global_lru(sc)) { int file = is_file_lru(l); int scan; @@ -1318,7 +1420,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone, while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { - for_each_lru(l) { + for_each_evictable_lru(l) { if (nr[l]) { nr_to_scan = min(nr[l], (unsigned long)sc->swap_cluster_max); @@ -1875,8 +1977,8 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) continue; - for_each_lru(l) { - /* For pass = 0 we don't shrink the active list */ + for_each_evictable_lru(l) { + /* For pass = 0, we don't shrink the active list */ if (pass == 0 && (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) continue; @@ -2213,3 +2315,24 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) return ret; } #endif + +#ifdef CONFIG_UNEVICTABLE_LRU +/* + * page_evictable - test whether a page is evictable + * @page: the page to test + * @vma: the VMA in which the page is or will be mapped, may be NULL + * + * Test whether page is evictable--i.e., should be placed on active/inactive + * lists vs unevictable list. + * + * Reasons page might not be evictable: + * TODO - later patches + */ +int page_evictable(struct page *page, struct vm_area_struct *vma) +{ + + /* TODO: test page [!]evictable conditions */ + + return 1; +} +#endif |