mm: thp: add split tail pages to shrink page list in page reclaim

In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU list. Since we are reclaiming a huge page, it's better we reclaim all subpages of the huge page instead of just the head page. This patch adds split tail pages to shrink page list so the tail pages can be reclaimed soon. Before this patch, run a swap workload: thp_fault_alloc 3492 thp_fault_fallback 608 thp_collapse_alloc 6 thp_collapse_alloc_failed 0 thp_split 916 With this patch: thp_fault_alloc 4085 thp_fault_fallback 16 thp_collapse_alloc 90 thp_collapse_alloc_failed 0 thp_split 1272 fallback allocation is reduced a lot. [akpm@linux-foundation.org: fix CONFIG_SWAP=n build] Signed-off-by: Shaohua Li <shli@fusionio.com> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Minchan Kim <minchan@kernel.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Shaohua Li <shli@kernel.org> 2013-04-29 15:08:36 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-04-29 15:54:38 -0700
commit: 5bc7b8aca942d03bf2716ddcfcb4e0b57e43a1b8 (patch)
tree: c76049e13755609ecbd4c4066fddd8bbfdd9f650 /mm
parent: 1eec6702a80e04416d528846a5ff2122484d95ec (diff)
download: op-kernel-dev-5bc7b8aca942d03bf2716ddcfcb4e0b57e43a1b8.zip
op-kernel-dev-5bc7b8aca942d03bf2716ddcfcb4e0b57e43a1b8.tar.gz
4 files changed, 26 insertions, 12 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 45eaae0..2ed1a16 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1559,7 +1559,8 @@ static int __split_huge_page_splitting(struct page *page,
 	return ret;
 }
 
-static void __split_huge_page_refcount(struct page *page)
+static void __split_huge_page_refcount(struct page *page,
+				       struct list_head *list)
 {
 	int i;
 	struct zone *zone = page_zone(page);
@@ -1645,7 +1646,7 @@ static void __split_huge_page_refcount(struct page *page)
 		BUG_ON(!PageDirty(page_tail));
 		BUG_ON(!PageSwapBacked(page_tail));
 
-		lru_add_page_tail(page, page_tail, lruvec);
+		lru_add_page_tail(page, page_tail, lruvec, list);
 	}
 	atomic_sub(tail_count, &page->_count);
 	BUG_ON(atomic_read(&page->_count) <= 0);
@@ -1752,7 +1753,8 @@ static int __split_huge_page_map(struct page *page,
 
 /* must be called with anon_vma->root->rwsem held */
 static void __split_huge_page(struct page *page,
-			      struct anon_vma *anon_vma)
+			      struct anon_vma *anon_vma,
+			      struct list_head *list)
 {
 	int mapcount, mapcount2;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -1783,7 +1785,7 @@ static void __split_huge_page(struct page *page,
 		       mapcount, page_mapcount(page));
 	BUG_ON(mapcount != page_mapcount(page));
 
-	__split_huge_page_refcount(page);
+	__split_huge_page_refcount(page, list);
 
 	mapcount2 = 0;
 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
@@ -1798,7 +1800,14 @@ static void __split_huge_page(struct page *page,
 	BUG_ON(mapcount != mapcount2);
 }
 
-int split_huge_page(struct page *page)
+/*
+ * Split a hugepage into normal pages. This doesn't change the position of head
+ * page. If @list is null, tail pages will be added to LRU list, otherwise, to
+ * @list. Both head page and tail pages will inherit mapping, flags, and so on
+ * from the hugepage.
+ * Return 0 if the hugepage is split successfully otherwise return 1.
+ */
+int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
 	struct anon_vma *anon_vma;
 	int ret = 1;
@@ -1823,7 +1832,7 @@ int split_huge_page(struct page *page)
 		goto out_unlock;
 
 	BUG_ON(!PageSwapBacked(page));
-	__split_huge_page(page, anon_vma);
+	__split_huge_page(page, anon_vma, list);
 	count_vm_event(THP_SPLIT);
 
 	BUG_ON(PageCompound(page));
diff --git a/mm/swap.c b/mm/swap.c
index 8a529a0..acd40bf 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -737,7 +737,7 @@ EXPORT_SYMBOL(__pagevec_release);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /* used by __split_huge_page_refcount() */
 void lru_add_page_tail(struct page *page, struct page *page_tail,
-		       struct lruvec *lruvec)
+		       struct lruvec *lruvec, struct list_head *list)
 {
 	int uninitialized_var(active);
 	enum lru_list lru;
@@ -749,7 +749,8 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
 	VM_BUG_ON(NR_CPUS != 1 &&
 		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
 
-	SetPageLRU(page_tail);
+	if (!list)
+		SetPageLRU(page_tail);
 
 	if (page_evictable(page_tail)) {
 		if (PageActive(page)) {
@@ -767,7 +768,11 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
 
 	if (likely(PageLRU(page)))
 		list_add_tail(&page_tail->lru, &page->lru);
-	else {
+	else if (list) {
+		/* page reclaim is reclaiming a huge page */
+		get_page(page_tail);
+		list_add_tail(&page_tail->lru, list);
+	} else {
 		struct list_head *list_head;
 		/*
 		 * Head page has not yet been counted, as an hpage,
diff --git a/mm/swap_state.c b/mm/swap_state.c
index fe43fd5..b3d40dc 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -160,7 +160,7 @@ void __delete_from_swap_cache(struct page *page)
  * Allocate swap space for the page and add the page to the
  * swap cache.  Caller needs to hold the page lock. 
  */
-int add_to_swap(struct page *page)
+int add_to_swap(struct page *page, struct list_head *list)
 {
 	swp_entry_t entry;
 	int err;
@@ -173,7 +173,7 @@ int add_to_swap(struct page *page)
 		return 0;
 
 	if (unlikely(PageTransHuge(page)))
-		if (unlikely(split_huge_page(page))) {
+		if (unlikely(split_huge_page_to_list(page, list))) {
 			swapcache_free(entry, NULL);
 			return 0;
 		}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e53e495..fa6a853 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -781,7 +781,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		if (PageAnon(page) && !PageSwapCache(page)) {
 			if (!(sc->gfp_mask & __GFP_IO))
 				goto keep_locked;
-			if (!add_to_swap(page))
+			if (!add_to_swap(page, page_list))
 				goto activate_locked;
 			may_enter_fs = 1;
 		}
author	Shaohua Li <shli@kernel.org>	2013-04-29 15:08:36 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-04-29 15:54:38 -0700
commit	5bc7b8aca942d03bf2716ddcfcb4e0b57e43a1b8 (patch)
tree	c76049e13755609ecbd4c4066fddd8bbfdd9f650 /mm
parent	1eec6702a80e04416d528846a5ff2122484d95ec (diff)
download	op-kernel-dev-5bc7b8aca942d03bf2716ddcfcb4e0b57e43a1b8.zip op-kernel-dev-5bc7b8aca942d03bf2716ddcfcb4e0b57e43a1b8.tar.gz