From 486cf46f3f9be5f2a966016c1a8fe01e32cde09e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 19 Oct 2011 12:50:35 -0700 Subject: mm: fix race between mremap and removing migration entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I don't usually pay much attention to the stale "? " addresses in stack backtraces, but this lucky report from Pawel Sikora hints that mremap's move_ptes() has inadequate locking against page migration. 3.0 BUG_ON(!PageLocked(p)) in migration_entry_to_page(): kernel BUG at include/linux/swapops.h:105! RIP: 0010:[] [] migration_entry_wait+0x156/0x160 [] handle_pte_fault+0xae1/0xaf0 [] ? __pte_alloc+0x42/0x120 [] ? do_huge_pmd_anonymous_page+0xab/0x310 [] handle_mm_fault+0x181/0x310 [] ? vma_adjust+0x537/0x570 [] do_page_fault+0x11d/0x4e0 [] ? do_mremap+0x2d5/0x570 [] page_fault+0x1f/0x30 mremap's down_write of mmap_sem, together with i_mmap_mutex or lock, and pagetable locks, were good enough before page migration (with its requirement that every migration entry be found) came in, and enough while migration always held mmap_sem; but not enough nowadays, when there's memory hotremove and compaction. The danger is that move_ptes() lets a migration entry dodge around behind remove_migration_pte()'s back, so it's in the old location when looking at the new, then in the new location when looking at the old. Either mremap's move_ptes() must additionally take anon_vma lock(), or migration's remove_migration_pte() must stop peeking for is_swap_entry() before it takes pagetable lock. Consensus chooses the latter: we prefer to add overhead to migration than to mremapping, which gets used by JVMs and by exec stack setup. Reported-and-tested-by: Paweł Sikora Signed-off-by: Hugh Dickins Acked-by: Andrea Arcangeli Acked-by: Mel Gorman Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- mm/migrate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/migrate.c') diff --git a/mm/migrate.c b/mm/migrate.c index 666e4e6..14d0a6a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -120,10 +120,10 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, ptep = pte_offset_map(pmd, addr); - if (!is_swap_pte(*ptep)) { - pte_unmap(ptep); - goto out; - } + /* + * Peek to check is_swap_pte() before taking ptlock? No, we + * can race mremap's move_ptes(), which skips anon_vma lock. + */ ptl = pte_lockptr(mm, pmd); } -- cgit v1.1 From b95f1b31b75588306e32b2afd32166cad48f670b Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 16 Oct 2011 02:01:52 -0400 Subject: mm: Map most files to use export.h instead of module.h The files changed within are only using the EXPORT_SYMBOL macro variants. They are not using core modular infrastructure and hence don't need module.h but only the export.h header. Signed-off-by: Paul Gortmaker --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/migrate.c') diff --git a/mm/migrate.c b/mm/migrate.c index 14d0a6a..459c41b1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -13,7 +13,7 @@ */ #include -#include +#include #include #include #include -- cgit v1.1 From 0dabec93de633a87adfbbe1d800a4c56cd19d73b Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 31 Oct 2011 17:06:57 -0700 Subject: mm: migration: clean up unmap_and_move() unmap_and_move() is one a big messy function. Clean it up. Signed-off-by: Minchan Kim Reviewed-by: KOSAKI Motohiro Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Rik van Riel Cc: Michal Hocko Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 75 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) (limited to 'mm/migrate.c') diff --git a/mm/migrate.c b/mm/migrate.c index 14d0a6a..33358f8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -621,38 +621,18 @@ static int move_to_new_page(struct page *newpage, struct page *page, return rc; } -/* - * Obtain the lock on page, remove all ptes and migrate the page - * to the newly allocated page in newpage. - */ -static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force, bool offlining, bool sync) +static int __unmap_and_move(struct page *page, struct page *newpage, + int force, bool offlining, bool sync) { - int rc = 0; - int *result = NULL; - struct page *newpage = get_new_page(page, private, &result); + int rc = -EAGAIN; int remap_swapcache = 1; int charge = 0; struct mem_cgroup *mem; struct anon_vma *anon_vma = NULL; - if (!newpage) - return -ENOMEM; - - if (page_count(page) == 1) { - /* page was freed from under us. So we are done. */ - goto move_newpage; - } - if (unlikely(PageTransHuge(page))) - if (unlikely(split_huge_page(page))) - goto move_newpage; - - /* prepare cgroup just returns 0 or -ENOMEM */ - rc = -EAGAIN; - if (!trylock_page(page)) { if (!force || !sync) - goto move_newpage; + goto out; /* * It's not safe for direct compaction to call lock_page. @@ -668,7 +648,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, * altogether. */ if (current->flags & PF_MEMALLOC) - goto move_newpage; + goto out; lock_page(page); } @@ -785,27 +765,52 @@ uncharge: mem_cgroup_end_migration(mem, page, newpage, rc == 0); unlock: unlock_page(page); +out: + return rc; +} -move_newpage: +/* + * Obtain the lock on page, remove all ptes and migrate the page + * to the newly allocated page in newpage. + */ +static int unmap_and_move(new_page_t get_new_page, unsigned long private, + struct page *page, int force, bool offlining, bool sync) +{ + int rc = 0; + int *result = NULL; + struct page *newpage = get_new_page(page, private, &result); + + if (!newpage) + return -ENOMEM; + + if (page_count(page) == 1) { + /* page was freed from under us. So we are done. */ + goto out; + } + + if (unlikely(PageTransHuge(page))) + if (unlikely(split_huge_page(page))) + goto out; + + rc = __unmap_and_move(page, newpage, force, offlining, sync); +out: if (rc != -EAGAIN) { - /* - * A page that has been migrated has all references - * removed and will be freed. A page that has not been - * migrated will have kepts its references and be - * restored. - */ - list_del(&page->lru); + /* + * A page that has been migrated has all references + * removed and will be freed. A page that has not been + * migrated will have kepts its references and be + * restored. + */ + list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); putback_lru_page(page); } - /* * Move the new page to the LRU. If migration was not successful * then this will free the page. */ putback_lru_page(newpage); - if (result) { if (rc) *result = rc; -- cgit v1.1