diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2015-10-09 08:32:21 +0530 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2015-10-12 15:30:09 +1100 |
commit | 891121e6c02c6242487aa4ea1d5c75b7ecdc45ee (patch) | |
tree | 118c5b4df68fe004438e4a1a629b04fbeed9b417 /arch/powerpc/mm | |
parent | ec2640b114d535ba7d895b6ee353791d542f2407 (diff) | |
download | op-kernel-dev-891121e6c02c6242487aa4ea1d5c75b7ecdc45ee.zip op-kernel-dev-891121e6c02c6242487aa4ea1d5c75b7ecdc45ee.tar.gz |
powerpc/mm: Differentiate between hugetlb and THP during page walk
We need to properly identify whether a hugepage is an explicit or
a transparent hugepage in follow_huge_addr(). We used to depend
on hugepage shift argument to do that. But in some case that can
result in wrong results. For ex:
On finding a transparent hugepage we set hugepage shift to PMD_SHIFT.
But we can end up clearing the thp pte, via pmdp_huge_get_and_clear.
We do prevent reusing the pfn page via the usage of
kick_all_cpus_sync(). But that happens after we updated the pte to 0.
Hence in follow_huge_addr() we can find hugepage shift set, but transparent
huge page check fail for a thp pte.
NOTE: We fixed a variant of this race against thp split in commit
691e95fd7396905a38d98919e9c150dbc3ea21a3
("powerpc/mm/thp: Make page table walk safe against thp split/collapse")
Without this patch, we may hit the BUG_ON(flags & FOLL_GET) in
follow_page_mask occasionally.
In the long term, we may want to switch ppc64 64k page size config to
enable CONFIG_ARCH_WANT_GENERAL_HUGETLB
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 21 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash64.c | 9 |
3 files changed, 25 insertions, 12 deletions
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index aee7017..7f9616f 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -994,6 +994,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap, unsigned long flags) { + bool is_thp; enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; @@ -1068,7 +1069,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, #endif /* CONFIG_PPC_64K_PAGES */ /* Get PTE and page size from page tables */ - ptep = __find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); + ptep = __find_linux_pte_or_hugepte(pgdir, ea, &is_thp, &hugeshift); if (ptep == NULL || !pte_present(*ptep)) { DBG_LOW(" no PTE !\n"); rc = 1; @@ -1088,7 +1089,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, } if (hugeshift) { - if (pmd_trans_huge(*(pmd_t *)ptep)) + if (is_thp) rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, trap, flags, ssize, psize); #ifdef CONFIG_HUGETLB_PAGE @@ -1243,7 +1244,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, * THP pages use update_mmu_cache_pmd. We don't do * hash preload there. Hence can ignore THP here */ - ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugepage_shift); + ptep = find_linux_pte_or_hugepte(pgdir, ea, NULL, &hugepage_shift); if (!ptep) goto out_exit; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f093828..9833fee 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -128,7 +128,7 @@ int pgd_huge(pgd_t pgd) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { /* Only called for hugetlbfs pages, hence can ignore THP */ - return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL); + return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL); } static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, @@ -703,13 +703,14 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { + bool is_thp; pte_t *ptep, pte; unsigned shift; unsigned long mask, flags; struct page *page = ERR_PTR(-EINVAL); local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); + ptep = find_linux_pte_or_hugepte(mm->pgd, address, &is_thp, &shift); if (!ptep) goto no_page; pte = READ_ONCE(*ptep); @@ -718,7 +719,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) * Transparent hugepages are handled by generic code. We can skip them * here. */ - if (!shift || pmd_trans_huge(__pmd(pte_val(pte)))) + if (!shift || is_thp) goto no_page; if (!pte_present(pte)) { @@ -975,7 +976,7 @@ void flush_dcache_icache_hugepage(struct page *page) */ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - unsigned *shift) + bool *is_thp, unsigned *shift) { pgd_t pgd, *pgdp; pud_t pud, *pudp; @@ -987,6 +988,9 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, if (shift) *shift = 0; + if (is_thp) + *is_thp = false; + pgdp = pgdir + pgd_index(ea); pgd = READ_ONCE(*pgdp); /* @@ -1034,7 +1038,14 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, if (pmd_none(pmd)) return NULL; - if (pmd_huge(pmd) || pmd_large(pmd)) { + if (pmd_trans_huge(pmd)) { + if (is_thp) + *is_thp = true; + ret_pte = (pte_t *) pmdp; + goto out; + } + + if (pmd_huge(pmd)) { ret_pte = (pte_t *) pmdp; goto out; } else if (is_hugepd(__hugepd(pmd_val(pmd)))) diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index c522969..f7b8039 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -190,6 +190,7 @@ void tlb_flush(struct mmu_gather *tlb) void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, unsigned long end) { + bool is_thp; int hugepage_shift; unsigned long flags; @@ -208,21 +209,21 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, local_irq_save(flags); arch_enter_lazy_mmu_mode(); for (; start < end; start += PAGE_SIZE) { - pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, + pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, &is_thp, &hugepage_shift); unsigned long pte; if (ptep == NULL) continue; pte = pte_val(*ptep); - if (hugepage_shift) + if (is_thp) trace_hugepage_invalidate(start, pte); if (!(pte & _PAGE_HASHPTE)) continue; - if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) + if (unlikely(is_thp)) hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); else - hpte_need_flush(mm, start, ptep, pte, 0); + hpte_need_flush(mm, start, ptep, pte, hugepage_shift); } arch_leave_lazy_mmu_mode(); local_irq_restore(flags); |