diff options
author | Mel Gorman <mgorman@suse.de> | 2015-02-12 14:58:32 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 18:54:08 -0800 |
commit | 21d9ee3eda7792c45880b2f11bff8e95c9a061fb (patch) | |
tree | 2e20932b8f1526e6d1f48add9e818ed43d7be8ee /include | |
parent | 4d9424669946532be754a6e116618dcb58430cb4 (diff) | |
download | op-kernel-dev-21d9ee3eda7792c45880b2f11bff8e95c9a061fb.zip op-kernel-dev-21d9ee3eda7792c45880b2f11bff8e95c9a061fb.tar.gz |
mm: remove remaining references to NUMA hinting bits and helpers
This patch removes the NUMA PTE bits and associated helpers. As a
side-effect it increases the maximum possible swap space on x86-64.
One potential source of problems is races between the marking of PTEs
PROT_NONE, NUMA hinting faults and migration. It must be guaranteed that
a PTE being protected is not faulted in parallel, seen as a pte_none and
corrupting memory. The base case is safe but transhuge has problems in
the past due to an different migration mechanism and a dependance on page
lock to serialise migrations and warrants a closer look.
task_work hinting update parallel fault
------------------------ --------------
change_pmd_range
change_huge_pmd
__pmd_trans_huge_lock
pmdp_get_and_clear
__handle_mm_fault
pmd_none
do_huge_pmd_anonymous_page
read? pmd_lock blocks until hinting complete, fail !pmd_none test
write? __do_huge_pmd_anonymous_page acquires pmd_lock, checks pmd_none
pmd_modify
set_pmd_at
task_work hinting update parallel migration
------------------------ ------------------
change_pmd_range
change_huge_pmd
__pmd_trans_huge_lock
pmdp_get_and_clear
__handle_mm_fault
do_huge_pmd_numa_page
migrate_misplaced_transhuge_page
pmd_lock waits for updates to complete, recheck pmd_same
pmd_modify
set_pmd_at
Both of those are safe and the case where a transhuge page is inserted
during a protection update is unchanged. The case where two processes try
migrating at the same time is unchanged by this series so should still be
ok. I could not find a case where we are accidentally depending on the
PTE not being cleared and flushed. If one is missed, it'll manifest as
corruption problems that start triggering shortly after this series is
merged and only happen when NUMA balancing is enabled.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/pgtable.h | 155 | ||||
-rw-r--r-- | include/linux/swapops.h | 2 |
2 files changed, 1 insertions, 156 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 067922c..4d46085 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -244,10 +244,6 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) # define pte_accessible(mm, pte) ((void)(pte), 1) #endif -#ifndef pte_present_nonuma -#define pte_present_nonuma(pte) pte_present(pte) -#endif - #ifndef flush_tlb_fix_spurious_fault #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) #endif @@ -693,157 +689,6 @@ static inline int pmd_protnone(pmd_t pmd) } #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_NUMA_BALANCING -/* - * _PAGE_NUMA distinguishes between an unmapped page table entry, an entry that - * is protected for PROT_NONE and a NUMA hinting fault entry. If the - * architecture defines __PAGE_PROTNONE then it should take that into account - * but those that do not can rely on the fact that the NUMA hinting scanner - * skips inaccessible VMAs. - * - * pte/pmd_present() returns true if pte/pmd_numa returns true. Page - * fault triggers on those regions if pte/pmd_numa returns true - * (because _PAGE_PRESENT is not set). - */ -#ifndef pte_numa -static inline int pte_numa(pte_t pte) -{ - return ptenuma_flags(pte) == _PAGE_NUMA; -} -#endif - -#ifndef pmd_numa -static inline int pmd_numa(pmd_t pmd) -{ - return pmdnuma_flags(pmd) == _PAGE_NUMA; -} -#endif - -/* - * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically - * because they're called by the NUMA hinting minor page fault. If we - * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler - * would be forced to set it later while filling the TLB after we - * return to userland. That would trigger a second write to memory - * that we optimize away by setting _PAGE_ACCESSED here. - */ -#ifndef pte_mknonnuma -static inline pte_t pte_mknonnuma(pte_t pte) -{ - pteval_t val = pte_val(pte); - - val &= ~_PAGE_NUMA; - val |= (_PAGE_PRESENT|_PAGE_ACCESSED); - return __pte(val); -} -#endif - -#ifndef pmd_mknonnuma -static inline pmd_t pmd_mknonnuma(pmd_t pmd) -{ - pmdval_t val = pmd_val(pmd); - - val &= ~_PAGE_NUMA; - val |= (_PAGE_PRESENT|_PAGE_ACCESSED); - - return __pmd(val); -} -#endif - -#ifndef pte_mknuma -static inline pte_t pte_mknuma(pte_t pte) -{ - pteval_t val = pte_val(pte); - - VM_BUG_ON(!(val & _PAGE_PRESENT)); - - val &= ~_PAGE_PRESENT; - val |= _PAGE_NUMA; - - return __pte(val); -} -#endif - -#ifndef ptep_set_numa -static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - pte_t ptent = *ptep; - - ptent = pte_mknuma(ptent); - set_pte_at(mm, addr, ptep, ptent); - return; -} -#endif - -#ifndef pmd_mknuma -static inline pmd_t pmd_mknuma(pmd_t pmd) -{ - pmdval_t val = pmd_val(pmd); - - val &= ~_PAGE_PRESENT; - val |= _PAGE_NUMA; - - return __pmd(val); -} -#endif - -#ifndef pmdp_set_numa -static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - pmd_t pmd = *pmdp; - - pmd = pmd_mknuma(pmd); - set_pmd_at(mm, addr, pmdp, pmd); - return; -} -#endif -#else -static inline int pmd_numa(pmd_t pmd) -{ - return 0; -} - -static inline int pte_numa(pte_t pte) -{ - return 0; -} - -static inline pte_t pte_mknonnuma(pte_t pte) -{ - return pte; -} - -static inline pmd_t pmd_mknonnuma(pmd_t pmd) -{ - return pmd; -} - -static inline pte_t pte_mknuma(pte_t pte) -{ - return pte; -} - -static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - return; -} - - -static inline pmd_t pmd_mknuma(pmd_t pmd) -{ - return pmd; -} - -static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - return ; -} -#endif /* CONFIG_NUMA_BALANCING */ - #endif /* CONFIG_MMU */ #endif /* !__ASSEMBLY__ */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 831a316..cedf3d3 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry) /* check whether a pte points to a swap entry */ static inline int is_swap_pte(pte_t pte) { - return !pte_none(pte) && !pte_present_nonuma(pte); + return !pte_none(pte) && !pte_present(pte); } #endif |