From 688272809fcce5b17fcefd5892b59f3788efb144 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 7 Jun 2018 17:06:34 -0700 Subject: mm, gup: prevent pmd checking race in follow_pmd_mask() mmap_sem will be read locked when calling follow_pmd_mask(). But this cannot prevent PMD from being changed for all cases when PTL is unlocked, for example, from pmd_trans_huge() to pmd_none() via MADV_DONTNEED. So it is possible for the pmd_present() check in follow_pmd_mask() to encounter an invalid PMD. This may cause an incorrect VM_BUG_ON() or an infinite loop. Fix this by reading the PMD entry into a local variable with READ_ONCE() and checking the local variable and pmd_none() in the retry loop. As Kirill pointed out, with PTL unlocked, the *pmd may be changed under us, so reading it directly again and again may incur weird bugs. So although using *pmd directly other than for pmd_present() checking may be safe, it is still better to replace them to read *pmd once and check the local variable multiple times. When PTL unlocked, replace all *pmd with local variable was suggested by Kirill. Link: http://lkml.kernel.org/r/20180419083514.1365-1-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Zi Yan Cc: "Kirill A. Shutemov" Cc: Al Viro Cc: "Aneesh Kumar K.V" Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 0101539..1020c7f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -212,53 +212,69 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, unsigned int flags, unsigned int *page_mask) { - pmd_t *pmd; + pmd_t *pmd, pmdval; spinlock_t *ptl; struct page *page; struct mm_struct *mm = vma->vm_mm; pmd = pmd_offset(pudp, address); - if (pmd_none(*pmd)) + /* + * The READ_ONCE() will stabilize the pmdval in a register or + * on the stack so that it will stop changing under the code. + */ + pmdval = READ_ONCE(*pmd); + if (pmd_none(pmdval)) return no_page_table(vma, flags); - if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { + if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) { page = follow_huge_pmd(mm, address, pmd, flags); if (page) return page; return no_page_table(vma, flags); } - if (is_hugepd(__hugepd(pmd_val(*pmd)))) { + if (is_hugepd(__hugepd(pmd_val(pmdval)))) { page = follow_huge_pd(vma, address, - __hugepd(pmd_val(*pmd)), flags, + __hugepd(pmd_val(pmdval)), flags, PMD_SHIFT); if (page) return page; return no_page_table(vma, flags); } retry: - if (!pmd_present(*pmd)) { + if (!pmd_present(pmdval)) { if (likely(!(flags & FOLL_MIGRATION))) return no_page_table(vma, flags); VM_BUG_ON(thp_migration_supported() && - !is_pmd_migration_entry(*pmd)); - if (is_pmd_migration_entry(*pmd)) + !is_pmd_migration_entry(pmdval)); + if (is_pmd_migration_entry(pmdval)) pmd_migration_entry_wait(mm, pmd); + pmdval = READ_ONCE(*pmd); + /* + * MADV_DONTNEED may convert the pmd to null because + * mmap_sem is held in read mode + */ + if (pmd_none(pmdval)) + return no_page_table(vma, flags); goto retry; } - if (pmd_devmap(*pmd)) { + if (pmd_devmap(pmdval)) { ptl = pmd_lock(mm, pmd); page = follow_devmap_pmd(vma, address, pmd, flags); spin_unlock(ptl); if (page) return page; } - if (likely(!pmd_trans_huge(*pmd))) + if (likely(!pmd_trans_huge(pmdval))) return follow_page_pte(vma, address, pmd, flags); - if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) + if ((flags & FOLL_NUMA) && pmd_protnone(pmdval)) return no_page_table(vma, flags); retry_locked: ptl = pmd_lock(mm, pmd); + if (unlikely(pmd_none(*pmd))) { + spin_unlock(ptl); + return no_page_table(vma, flags); + } if (unlikely(!pmd_present(*pmd))) { spin_unlock(ptl); if (likely(!(flags & FOLL_MIGRATION))) -- cgit v1.1