From b20ce5e03b936be077463015661dcf52be274e5b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 15 Jan 2016 16:54:37 -0800 Subject: mm: prepare page_referenced() and page_idle to new THP refcounting Both page_referenced() and page_idle_clear_pte_refs_one() assume that THP can only be mapped with PMD, so there's no reason to look on PTEs for PageTransHuge() pages. That's no true anymore: THP can be mapped with PTEs too. The patch removes PageTransHuge() test from the functions and opencode page table check. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Kirill A. Shutemov Cc: Vladimir Davydov Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: Sasha Levin Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 5 --- include/linux/mm.h | 23 ++++++---- mm/huge_memory.c | 73 ++++++++---------------------- mm/page_idle.c | 65 +++++++++++++++++++++++---- mm/rmap.c | 117 +++++++++++++++++++++++++++++++++--------------- mm/util.c | 14 ++++++ 6 files changed, 185 insertions(+), 112 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7aec5ee..72cd942 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -48,11 +48,6 @@ enum transparent_hugepage_flag { #endif }; -extern pmd_t *page_check_address_pmd(struct page *page, - struct mm_struct *mm, - unsigned long address, - spinlock_t **ptl); - #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<_mapcount, -1); } +int __page_mapcount(struct page *page); + static inline int page_mapcount(struct page *page) { - int ret; VM_BUG_ON_PAGE(PageSlab(page), page); - ret = atomic_read(&page->_mapcount) + 1; - if (PageCompound(page)) { - page = compound_head(page); - ret += atomic_read(compound_mapcount_ptr(page)) + 1; - if (PageDoubleMap(page)) - ret--; - } - return ret; + if (unlikely(PageCompound(page))) + return __page_mapcount(page); + return atomic_read(&page->_mapcount) + 1; +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int total_mapcount(struct page *page); +#else +static inline int total_mapcount(struct page *page) +{ + return page_mapcount(page); } +#endif static inline int page_count(struct page *page) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f283cb7..ab544b1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1649,46 +1649,6 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, return false; } -/* - * This function returns whether a given @page is mapped onto the @address - * in the virtual space of @mm. - * - * When it's true, this function returns *pmd with holding the page table lock - * and passing it back to the caller via @ptl. - * If it's false, returns NULL without holding the page table lock. - */ -pmd_t *page_check_address_pmd(struct page *page, - struct mm_struct *mm, - unsigned long address, - spinlock_t **ptl) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - if (address & ~HPAGE_PMD_MASK) - return NULL; - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - return NULL; - pud = pud_offset(pgd, address); - if (!pud_present(*pud)) - return NULL; - pmd = pmd_offset(pud, address); - - *ptl = pmd_lock(mm, pmd); - if (!pmd_present(*pmd)) - goto unlock; - if (pmd_page(*pmd) != page) - goto unlock; - if (pmd_trans_huge(*pmd)) - return pmd; -unlock: - spin_unlock(*ptl); - return NULL; -} - #define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE) int hugepage_madvise(struct vm_area_struct *vma, @@ -3097,20 +3057,6 @@ static void unfreeze_page(struct anon_vma *anon_vma, struct page *page) } } -static int total_mapcount(struct page *page) -{ - int i, ret; - - ret = compound_mapcount(page); - for (i = 0; i < HPAGE_PMD_NR; i++) - ret += atomic_read(&page[i]._mapcount) + 1; - - if (PageDoubleMap(page)) - ret -= HPAGE_PMD_NR; - - return ret; -} - static int __split_huge_page_tail(struct page *head, int tail, struct lruvec *lruvec, struct list_head *list) { @@ -3211,6 +3157,25 @@ static void __split_huge_page(struct page *page, struct list_head *list) } } +int total_mapcount(struct page *page) +{ + int i, ret; + + VM_BUG_ON_PAGE(PageTail(page), page); + + if (likely(!PageCompound(page))) + return atomic_read(&page->_mapcount) + 1; + + ret = compound_mapcount(page); + if (PageHuge(page)) + return ret; + for (i = 0; i < HPAGE_PMD_NR; i++) + ret += atomic_read(&page[i]._mapcount) + 1; + if (PageDoubleMap(page)) + ret -= HPAGE_PMD_NR; + return ret; +} + /* * This function splits huge page into normal pages. @page can point to any * subpage of huge page to split. Split doesn't change the position of @page. diff --git a/mm/page_idle.c b/mm/page_idle.c index 1c245d9..2c553ba9 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -56,23 +56,70 @@ static int page_idle_clear_pte_refs_one(struct page *page, { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; bool referenced = false; - if (unlikely(PageTransHuge(page))) { - pmd = page_check_address_pmd(page, mm, addr, &ptl); - if (pmd) { - referenced = pmdp_clear_young_notify(vma, addr, pmd); + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + return SWAP_AGAIN; + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return SWAP_AGAIN; + pmd = pmd_offset(pud, addr); + + if (pmd_trans_huge(*pmd)) { + ptl = pmd_lock(mm, pmd); + if (!pmd_present(*pmd)) + goto unlock_pmd; + if (unlikely(!pmd_trans_huge(*pmd))) { spin_unlock(ptl); + goto map_pte; } + + if (pmd_page(*pmd) != page) + goto unlock_pmd; + + referenced = pmdp_clear_young_notify(vma, addr, pmd); + spin_unlock(ptl); + goto found; +unlock_pmd: + spin_unlock(ptl); + return SWAP_AGAIN; } else { - pte = page_check_address(page, mm, addr, &ptl, 0); - if (pte) { - referenced = ptep_clear_young_notify(vma, addr, pte); - pte_unmap_unlock(pte, ptl); - } + pmd_t pmde = *pmd; + + barrier(); + if (!pmd_present(pmde) || pmd_trans_huge(pmde)) + return SWAP_AGAIN; + + } +map_pte: + pte = pte_offset_map(pmd, addr); + if (!pte_present(*pte)) { + pte_unmap(pte); + return SWAP_AGAIN; } + + ptl = pte_lockptr(mm, pmd); + spin_lock(ptl); + + if (!pte_present(*pte)) { + pte_unmap_unlock(pte, ptl); + return SWAP_AGAIN; + } + + /* THP can be referenced by any subpage */ + if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) { + pte_unmap_unlock(pte, ptl); + return SWAP_AGAIN; + } + + referenced = ptep_clear_young_notify(vma, addr, pte); + pte_unmap_unlock(pte, ptl); +found: if (referenced) { clear_page_idle(page); /* diff --git a/mm/rmap.c b/mm/rmap.c index 31d8866..6127c00 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -814,58 +814,105 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, spinlock_t *ptl; int referenced = 0; struct page_referenced_arg *pra = arg; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; - if (unlikely(PageTransHuge(page))) { - pmd_t *pmd; - - /* - * rmap might return false positives; we must filter - * these out using page_check_address_pmd(). - */ - pmd = page_check_address_pmd(page, mm, address, &ptl); - if (!pmd) + if (unlikely(PageHuge(page))) { + /* when pud is not present, pte will be NULL */ + pte = huge_pte_offset(mm, address); + if (!pte) return SWAP_AGAIN; - if (vma->vm_flags & VM_LOCKED) { + ptl = huge_pte_lockptr(page_hstate(page), mm, pte); + goto check_pte; + } + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + return SWAP_AGAIN; + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return SWAP_AGAIN; + pmd = pmd_offset(pud, address); + + if (pmd_trans_huge(*pmd)) { + int ret = SWAP_AGAIN; + + ptl = pmd_lock(mm, pmd); + if (!pmd_present(*pmd)) + goto unlock_pmd; + if (unlikely(!pmd_trans_huge(*pmd))) { spin_unlock(ptl); + goto map_pte; + } + + if (pmd_page(*pmd) != page) + goto unlock_pmd; + + if (vma->vm_flags & VM_LOCKED) { pra->vm_flags |= VM_LOCKED; - return SWAP_FAIL; /* To break the loop */ + ret = SWAP_FAIL; /* To break the loop */ + goto unlock_pmd; } if (pmdp_clear_flush_young_notify(vma, address, pmd)) referenced++; spin_unlock(ptl); + goto found; +unlock_pmd: + spin_unlock(ptl); + return ret; } else { - pte_t *pte; + pmd_t pmde = *pmd; - /* - * rmap might return false positives; we must filter - * these out using page_check_address(). - */ - pte = page_check_address(page, mm, address, &ptl, 0); - if (!pte) + barrier(); + if (!pmd_present(pmde) || pmd_trans_huge(pmde)) return SWAP_AGAIN; + } +map_pte: + pte = pte_offset_map(pmd, address); + if (!pte_present(*pte)) { + pte_unmap(pte); + return SWAP_AGAIN; + } - if (vma->vm_flags & VM_LOCKED) { - pte_unmap_unlock(pte, ptl); - pra->vm_flags |= VM_LOCKED; - return SWAP_FAIL; /* To break the loop */ - } + ptl = pte_lockptr(mm, pmd); +check_pte: + spin_lock(ptl); - if (ptep_clear_flush_young_notify(vma, address, pte)) { - /* - * Don't treat a reference through a sequentially read - * mapping as such. If the page has been used in - * another mapping, we will catch it; if this other - * mapping is already gone, the unmap path will have - * set PG_referenced or activated the page. - */ - if (likely(!(vma->vm_flags & VM_SEQ_READ))) - referenced++; - } + if (!pte_present(*pte)) { + pte_unmap_unlock(pte, ptl); + return SWAP_AGAIN; + } + + /* THP can be referenced by any subpage */ + if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) { + pte_unmap_unlock(pte, ptl); + return SWAP_AGAIN; + } + + if (vma->vm_flags & VM_LOCKED) { pte_unmap_unlock(pte, ptl); + pra->vm_flags |= VM_LOCKED; + return SWAP_FAIL; /* To break the loop */ } + if (ptep_clear_flush_young_notify(vma, address, pte)) { + /* + * Don't treat a reference through a sequentially read + * mapping as such. If the page has been used in + * another mapping, we will catch it; if this other + * mapping is already gone, the unmap path will have + * set PG_referenced or activated the page. + */ + if (likely(!(vma->vm_flags & VM_SEQ_READ))) + referenced++; + } + pte_unmap_unlock(pte, ptl); + +found: if (referenced) clear_page_idle(page); if (test_and_clear_page_young(page)) @@ -912,7 +959,7 @@ int page_referenced(struct page *page, int ret; int we_locked = 0; struct page_referenced_arg pra = { - .mapcount = page_mapcount(page), + .mapcount = total_mapcount(page), .memcg = memcg, }; struct rmap_walk_control rwc = { diff --git a/mm/util.c b/mm/util.c index 8acb936..6d1f920 100644 --- a/mm/util.c +++ b/mm/util.c @@ -407,6 +407,20 @@ struct address_space *page_mapping(struct page *page) return mapping; } +/* Slow path of page_mapcount() for compound pages */ +int __page_mapcount(struct page *page) +{ + int ret; + + ret = atomic_read(&page->_mapcount) + 1; + page = compound_head(page); + ret += atomic_read(compound_mapcount_ptr(page)) + 1; + if (PageDoubleMap(page)) + ret--; + return ret; +} +EXPORT_SYMBOL_GPL(__page_mapcount); + int overcommit_ratio_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) -- cgit v1.1