diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 59 | ||||
-rw-r--r-- | mm/memory.c | 14 | ||||
-rw-r--r-- | mm/mempolicy.c | 13 | ||||
-rw-r--r-- | mm/page_alloc.c | 3 | ||||
-rw-r--r-- | mm/vmscan.c | 14 |
6 files changed, 61 insertions, 44 deletions
@@ -67,7 +67,7 @@ config DISCONTIGMEM config SPARSEMEM def_bool y - depends on SPARSEMEM_MANUAL + depends on (!SELECT_MEMORY_MODEL && ARCH_SPARSEMEM_ENABLE) || SPARSEMEM_MANUAL config FLATMEM def_bool y diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 729d4b1..dacc641 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -35,6 +35,7 @@ #include <linux/mm.h> #include <linux/page-flags.h> #include <linux/sched.h> +#include <linux/ksm.h> #include <linux/rmap.h> #include <linux/pagemap.h> #include <linux/swap.h> @@ -370,9 +371,6 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) int ret = FAILED; struct address_space *mapping; - if (!isolate_lru_page(p)) - page_cache_release(p); - /* * For anonymous pages we're done the only reference left * should be the one m_f() holds. @@ -498,30 +496,18 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) */ static int me_swapcache_dirty(struct page *p, unsigned long pfn) { - int ret = FAILED; - ClearPageDirty(p); /* Trigger EIO in shmem: */ ClearPageUptodate(p); - if (!isolate_lru_page(p)) { - page_cache_release(p); - ret = DELAYED; - } - - return ret; + return DELAYED; } static int me_swapcache_clean(struct page *p, unsigned long pfn) { - int ret = FAILED; - - if (!isolate_lru_page(p)) { - page_cache_release(p); - ret = RECOVERED; - } delete_from_swap_cache(p); - return ret; + + return RECOVERED; } /* @@ -611,8 +597,6 @@ static struct page_state { { 0, 0, "unknown page state", me_unknown }, }; -#undef lru - static void action_result(unsigned long pfn, char *msg, int result) { struct page *page = NULL; @@ -629,13 +613,16 @@ static int page_action(struct page_state *ps, struct page *p, unsigned long pfn, int ref) { int result; + int count; result = ps->action(p, pfn); action_result(pfn, ps->msg, result); - if (page_count(p) != 1 + ref) + + count = page_count(p) - 1 - ref; + if (count != 0) printk(KERN_ERR "MCE %#lx: %s page still referenced by %d users\n", - pfn, ps->msg, page_count(p) - 1); + pfn, ps->msg, count); /* Could do more checks here if page looks ok */ /* @@ -661,12 +648,9 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn, int i; int kill = 1; - if (PageReserved(p) || PageCompound(p) || PageSlab(p)) + if (PageReserved(p) || PageCompound(p) || PageSlab(p) || PageKsm(p)) return; - if (!PageLRU(p)) - lru_add_drain_all(); - /* * This check implies we don't kill processes if their pages * are in the swap cache early. Those are always late kills. @@ -738,6 +722,7 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn, int __memory_failure(unsigned long pfn, int trapno, int ref) { + unsigned long lru_flag; struct page_state *ps; struct page *p; int res; @@ -775,6 +760,24 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) } /* + * We ignore non-LRU pages for good reasons. + * - PG_locked is only well defined for LRU pages and a few others + * - to avoid races with __set_page_locked() + * - to avoid races with __SetPageSlab*() (and more non-atomic ops) + * The check (unnecessarily) ignores LRU pages being isolated and + * walked by the page reclaim code, however that's not a big loss. + */ + if (!PageLRU(p)) + lru_add_drain_all(); + lru_flag = p->flags & lru; + if (isolate_lru_page(p)) { + action_result(pfn, "non LRU", IGNORED); + put_page(p); + return -EBUSY; + } + page_cache_release(p); + + /* * Lock the page and wait for writeback to finish. * It's very difficult to mess with pages currently under IO * and in many cases impossible, so we just avoid it here. @@ -790,7 +793,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) /* * Torn down by someone else? */ - if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { + if ((lru_flag & lru) && !PageSwapCache(p) && p->mapping == NULL) { action_result(pfn, "already truncated LRU", IGNORED); res = 0; goto out; @@ -798,7 +801,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) res = -EBUSY; for (ps = error_states;; ps++) { - if ((p->flags & ps->mask) == ps->res) { + if (((p->flags | lru_flag)& ps->mask) == ps->res) { res = page_action(ps, p, pfn, ref); break; } diff --git a/mm/memory.c b/mm/memory.c index 7e91b5f..6ab19dd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -641,6 +641,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { + pte_t *orig_src_pte, *orig_dst_pte; pte_t *src_pte, *dst_pte; spinlock_t *src_ptl, *dst_ptl; int progress = 0; @@ -654,6 +655,8 @@ again: src_pte = pte_offset_map_nested(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); + orig_src_pte = src_pte; + orig_dst_pte = dst_pte; arch_enter_lazy_mmu_mode(); do { @@ -677,9 +680,9 @@ again: arch_leave_lazy_mmu_mode(); spin_unlock(src_ptl); - pte_unmap_nested(src_pte - 1); + pte_unmap_nested(orig_src_pte); add_mm_rss(dst_mm, rss[0], rss[1]); - pte_unmap_unlock(dst_pte - 1, dst_ptl); + pte_unmap_unlock(orig_dst_pte, dst_ptl); cond_resched(); if (addr != end) goto again; @@ -1820,10 +1823,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = fn(pte, token, addr, data); + err = fn(pte++, token, addr, data); if (err) break; - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); @@ -2539,7 +2542,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, } else if (PageHWPoison(page)) { ret = VM_FAULT_HWPOISON; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - goto out; + goto out_release; } lock_page(page); @@ -2611,6 +2614,7 @@ out_nomap: pte_unmap_unlock(page_table, ptl); out_page: unlock_page(page); +out_release: page_cache_release(page); return ret; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7dd9d9f..4545d59 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1024,7 +1024,7 @@ static long do_mbind(unsigned long start, unsigned long len, err = migrate_prep(); if (err) - return err; + goto mpol_out; } { NODEMASK_SCRATCH(scratch); @@ -1039,10 +1039,9 @@ static long do_mbind(unsigned long start, unsigned long len, err = -ENOMEM; NODEMASK_SCRATCH_FREE(scratch); } - if (err) { - mpol_put(new); - return err; - } + if (err) + goto mpol_out; + vma = check_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); @@ -1058,9 +1057,11 @@ static long do_mbind(unsigned long start, unsigned long len, if (!err && nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; - } + } else + putback_lru_pages(&pagelist); up_write(&mm->mmap_sem); + mpol_out: mpol_put(new); return err; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bf72055..cdcedf6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2183,7 +2183,7 @@ void show_free_areas(void) printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" " active_file:%lu inactive_file:%lu isolated_file:%lu\n" " unevictable:%lu" - " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" + " dirty:%lu writeback:%lu unstable:%lu\n" " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), @@ -2196,7 +2196,6 @@ void show_free_areas(void) global_page_state(NR_FILE_DIRTY), global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), - nr_blockdev_pages(), global_page_state(NR_FREE_PAGES), global_page_state(NR_SLAB_RECLAIMABLE), global_page_state(NR_SLAB_UNRECLAIMABLE), diff --git a/mm/vmscan.c b/mm/vmscan.c index 64e43889..777af57 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -544,6 +544,16 @@ redo: */ lru = LRU_UNEVICTABLE; add_page_to_unevictable_list(page); + /* + * When racing with an mlock clearing (page is + * unlocked), make sure that if the other thread does + * not observe our setting of PG_lru and fails + * isolation, we see PG_mlocked cleared below and move + * the page back to the evictable list. + * + * The other side is TestClearPageMlocked(). + */ + smp_mb(); } /* @@ -1088,7 +1098,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, int lumpy_reclaim = 0; while (unlikely(too_many_isolated(zone, file, sc))) { - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); /* We are about to die and free our memory. Return now. */ if (fatal_signal_pending(current)) @@ -1356,7 +1366,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * IO, plus JVM can create lots of anon VM_EXEC pages, * so we ignore them here. */ - if ((vm_flags & VM_EXEC) && !PageAnon(page)) { + if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) { list_add(&page->lru, &l_active); continue; } |