diff options
author | Mel Gorman <mgorman@suse.de> | 2015-02-12 14:58:32 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 18:54:08 -0800 |
commit | 21d9ee3eda7792c45880b2f11bff8e95c9a061fb (patch) | |
tree | 2e20932b8f1526e6d1f48add9e818ed43d7be8ee /arch/powerpc | |
parent | 4d9424669946532be754a6e116618dcb58430cb4 (diff) | |
download | op-kernel-dev-21d9ee3eda7792c45880b2f11bff8e95c9a061fb.zip op-kernel-dev-21d9ee3eda7792c45880b2f11bff8e95c9a061fb.tar.gz |
mm: remove remaining references to NUMA hinting bits and helpers
This patch removes the NUMA PTE bits and associated helpers. As a
side-effect it increases the maximum possible swap space on x86-64.
One potential source of problems is races between the marking of PTEs
PROT_NONE, NUMA hinting faults and migration. It must be guaranteed that
a PTE being protected is not faulted in parallel, seen as a pte_none and
corrupting memory. The base case is safe but transhuge has problems in
the past due to an different migration mechanism and a dependance on page
lock to serialise migrations and warrants a closer look.
task_work hinting update parallel fault
------------------------ --------------
change_pmd_range
change_huge_pmd
__pmd_trans_huge_lock
pmdp_get_and_clear
__handle_mm_fault
pmd_none
do_huge_pmd_anonymous_page
read? pmd_lock blocks until hinting complete, fail !pmd_none test
write? __do_huge_pmd_anonymous_page acquires pmd_lock, checks pmd_none
pmd_modify
set_pmd_at
task_work hinting update parallel migration
------------------------ ------------------
change_pmd_range
change_huge_pmd
__pmd_trans_huge_lock
pmdp_get_and_clear
__handle_mm_fault
do_huge_pmd_numa_page
migrate_misplaced_transhuge_page
pmd_lock waits for updates to complete, recheck pmd_same
pmd_modify
set_pmd_at
Both of those are safe and the case where a transhuge page is inserted
during a protection update is unchanged. The case where two processes try
migrating at the same time is unchanged by this series so should still be
ok. I could not find a case where we are accidentally depending on the
PTE not being cleared and flushed. If one is missed, it'll manifest as
corruption problems that start triggering shortly after this series is
merged and only happen when NUMA balancing is enabled.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/include/asm/pgtable.h | 54 | ||||
-rw-r--r-- | arch/powerpc/include/asm/pte-common.h | 5 | ||||
-rw-r--r-- | arch/powerpc/include/asm/pte-hash64.h | 6 |
3 files changed, 1 insertions, 64 deletions
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 1146006..79fee2e 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -55,64 +55,12 @@ static inline int pmd_protnone(pmd_t pmd) { return pte_protnone(pmd_pte(pmd)); } - -static inline int pte_present(pte_t pte) -{ - return pte_val(pte) & _PAGE_NUMA_MASK; -} - -#define pte_present_nonuma pte_present_nonuma -static inline int pte_present_nonuma(pte_t pte) -{ - return pte_val(pte) & (_PAGE_PRESENT); -} - -#define ptep_set_numa ptep_set_numa -static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - if ((pte_val(*ptep) & _PAGE_PRESENT) == 0) - VM_BUG_ON(1); - - pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0); - return; -} - -#define pmdp_set_numa pmdp_set_numa -static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0) - VM_BUG_ON(1); - - pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA); - return; -} - -/* - * Generic NUMA pte helpers expect pteval_t and pmdval_t types to exist - * which was inherited from x86. For the purposes of powerpc pte_basic_t and - * pmd_t are equivalent - */ -#define pteval_t pte_basic_t -#define pmdval_t pmd_t -static inline pteval_t ptenuma_flags(pte_t pte) -{ - return pte_val(pte) & _PAGE_NUMA_MASK; -} - -static inline pmdval_t pmdnuma_flags(pmd_t pmd) -{ - return pmd_val(pmd) & _PAGE_NUMA_MASK; -} - -# else +#endif /* CONFIG_NUMA_BALANCING */ static inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_PRESENT; } -#endif /* CONFIG_NUMA_BALANCING */ /* Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 2aef9b7..c5a755e 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -104,11 +104,6 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \ _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) -#ifdef CONFIG_NUMA_BALANCING -/* Mask of bits that distinguish present and numa ptes */ -#define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PRESENT) -#endif - /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h index 2505d8e..55aea0c 100644 --- a/arch/powerpc/include/asm/pte-hash64.h +++ b/arch/powerpc/include/asm/pte-hash64.h @@ -27,12 +27,6 @@ #define _PAGE_RW 0x0200 /* software: user write access allowed */ #define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ -/* - * Used for tracking numa faults - */ -#define _PAGE_NUMA 0x00000010 /* Gather numa placement stats */ - - /* No separate kernel read-only */ #define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ #define _PAGE_KERNEL_RO _PAGE_KERNEL_RW |