From 4bbd4c776a63a063546552de42f6a535395f6d9e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 4 Jun 2014 16:08:10 -0700 Subject: mm: move get_user_pages()-related code to separate file mm/memory.c is overloaded: over 4k lines. get_user_pages() code is pretty much self-contained let's move it to separate file. No other changes made. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm/internal.h') diff --git a/mm/internal.h b/mm/internal.h index 07b6736..6ee580d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -169,6 +169,11 @@ static inline unsigned long page_order(struct page *page) return page_private(page); } +static inline bool is_cow_mapping(vm_flags_t flags) +{ + return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; +} + /* mm/util.c */ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent); -- cgit v1.1 From e0b9daeb453e602a95ea43853dc12d385558ce1f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 4 Jun 2014 16:08:28 -0700 Subject: mm, compaction: embed migration mode in compact_control We're going to want to manipulate the migration mode for compaction in the page allocator, and currently compact_control's sync field is only a bool. Currently, we only do MIGRATE_ASYNC or MIGRATE_SYNC_LIGHT compaction depending on the value of this bool. Convert the bool to enum migrate_mode and pass the migration mode in directly. Later, we'll want to avoid MIGRATE_SYNC_LIGHT for thp allocations in the pagefault patch to avoid unnecessary latency. This also alters compaction triggered from sysfs, either for the entire system or for a node, to force MIGRATE_SYNC. [akpm@linux-foundation.org: fix build] [iamjoonsoo.kim@lge.com: use MIGRATE_SYNC in alloc_contig_range()] Signed-off-by: David Rientjes Suggested-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Greg Thelen Cc: Naoya Horiguchi Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/internal.h') diff --git a/mm/internal.h b/mm/internal.h index 6ee580d..a25424a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -134,7 +134,7 @@ struct compact_control { unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ - bool sync; /* Synchronous migration */ + enum migrate_mode mode; /* Async or sync migration mode */ bool ignore_skip_hint; /* Scan blocks even if marked skip */ bool finished_update_free; /* True when the zone cached pfns are * no longer being updated -- cgit v1.1 From bea04b073292b2acb522c7c1aa67a4fc58151530 Mon Sep 17 00:00:00 2001 From: Jianyu Zhan Date: Wed, 4 Jun 2014 16:09:51 -0700 Subject: mm: use the light version __mod_zone_page_state in mlocked_vma_newpage() mlocked_vma_newpage() is called with pte lock held(a spinlock), which implies preemtion disabled, and the vm stat counter is not modified from interrupt context, so we need not use an irq-safe mod_zone_page_state() here, using a light-weight version __mod_zone_page_state() would be OK. This patch also documents __mod_zone_page_state() and some of its callsites. The comment above __mod_zone_page_state() is from Hugh Dickins, and acked by Christoph. Most credits to Hugh and Christoph for the clarification on the usage of the __mod_zone_page_state(). [akpm@linux-foundation.org: coding-style fixes] Suggested-by: Andrew Morton Acked-by: Hugh Dickins Signed-off-by: Jianyu Zhan Reviewed-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'mm/internal.h') diff --git a/mm/internal.h b/mm/internal.h index a25424a..e067984 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -201,7 +201,12 @@ static inline int mlocked_vma_newpage(struct vm_area_struct *vma, return 0; if (!TestSetPageMlocked(page)) { - mod_zone_page_state(page_zone(page), NR_MLOCK, + /* + * We use the irq-unsafe __mod_zone_page_stat because this + * counter is not modified from interrupt context, and the pte + * lock is held(spinlock), which implies preemption disabled. + */ + __mod_zone_page_state(page_zone(page), NR_MLOCK, hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); } -- cgit v1.1 From 7ee07a44eb53374a73544ae14c71366a02d462e0 Mon Sep 17 00:00:00 2001 From: Jianyu Zhan Date: Wed, 4 Jun 2014 16:09:52 -0700 Subject: mm: fold mlocked_vma_newpage() into its only call site In previous commit(mm: use the light version __mod_zone_page_state in mlocked_vma_newpage()) a irq-unsafe __mod_zone_page_state is used. And as suggested by Andrew, to reduce the risks that new call sites incorrectly using mlocked_vma_newpage() without knowing they are adding racing, this patch folds mlocked_vma_newpage() into its only call site, page_add_new_anon_rmap, to make it open-cocded for people to know what is going on. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jianyu Zhan Suggested-by: Andrew Morton Suggested-by: Hugh Dickins Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'mm/internal.h') diff --git a/mm/internal.h b/mm/internal.h index e067984..802c3a4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -189,31 +189,6 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) } /* - * Called only in fault path, to determine if a new page is being - * mapped into a LOCKED vma. If it is, mark page as mlocked. - */ -static inline int mlocked_vma_newpage(struct vm_area_struct *vma, - struct page *page) -{ - VM_BUG_ON_PAGE(PageLRU(page), page); - - if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) - return 0; - - if (!TestSetPageMlocked(page)) { - /* - * We use the irq-unsafe __mod_zone_page_stat because this - * counter is not modified from interrupt context, and the pte - * lock is held(spinlock), which implies preemption disabled. - */ - __mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); - } - return 1; -} - -/* * must be called with vma's mmap_sem held for read or write, and page locked. */ extern void mlock_vma_page(struct page *page); @@ -255,10 +230,6 @@ extern unsigned long vma_address(struct page *page, struct vm_area_struct *vma); #endif #else /* !CONFIG_MMU */ -static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p) -{ - return 0; -} static inline void clear_page_mlock(struct page *page) { } static inline void mlock_vma_page(struct page *page) { } static inline void mlock_migrate_page(struct page *new, struct page *old) { } -- cgit v1.1 From be9765722e6b7ece8263cbab857490332339bd6f Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 4 Jun 2014 16:10:41 -0700 Subject: mm, compaction: properly signal and act upon lock and need_sched() contention Compaction uses compact_checklock_irqsave() function to periodically check for lock contention and need_resched() to either abort async compaction, or to free the lock, schedule and retake the lock. When aborting, cc->contended is set to signal the contended state to the caller. Two problems have been identified in this mechanism. First, compaction also calls directly cond_resched() in both scanners when no lock is yet taken. This call either does not abort async compaction, or set cc->contended appropriately. This patch introduces a new compact_should_abort() function to achieve both. In isolate_freepages(), the check frequency is reduced to once by SWAP_CLUSTER_MAX pageblocks to match what the migration scanner does in the preliminary page checks. In case a pageblock is found suitable for calling isolate_freepages_block(), the checks within there are done on higher frequency. Second, isolate_freepages() does not check if isolate_freepages_block() aborted due to contention, and advances to the next pageblock. This violates the principle of aborting on contention, and might result in pageblocks not being scanned completely, since the scanning cursor is advanced. This problem has been noticed in the code by Joonsoo Kim when reviewing related patches. This patch makes isolate_freepages_block() check the cc->contended flag and abort. In case isolate_freepages() has already isolated some pages before aborting due to contention, page migration will proceed, which is OK since we do not want to waste the work that has been done, and page migration has own checks for contention. However, we do not want another isolation attempt by either of the scanners, so cc->contended flag check is added also to compaction_alloc() and compact_finished() to make sure compaction is aborted right after the migration. The outcome of the patch should be reduced lock contention by async compaction and lower latencies for higher-order allocations where direct compaction is involved. [akpm@linux-foundation.org: fix typo in comment] Reported-by: Joonsoo Kim Signed-off-by: Vlastimil Babka Reviewed-by: Naoya Horiguchi Cc: Minchan Kim Cc: Mel Gorman Cc: Bartlomiej Zolnierkiewicz Cc: Michal Nazarewicz Cc: Christoph Lameter Cc: Rik van Riel Acked-by: Michal Nazarewicz Tested-by: Shawn Guo Tested-by: Kevin Hilman Tested-by: Stephen Warren Tested-by: Fabio Estevam Cc: David Rientjes Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'mm/internal.h') diff --git a/mm/internal.h b/mm/internal.h index 802c3a4..7f22a11f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -144,7 +144,10 @@ struct compact_control { int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; - bool contended; /* True if a lock was contended */ + bool contended; /* True if a lock was contended, or + * need_resched() true during async + * compaction + */ }; unsigned long -- cgit v1.1