diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/sh/mm/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 17 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 20 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 8 |
5 files changed, 45 insertions, 3 deletions
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index cb8f992..0f7c852 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -111,6 +111,7 @@ config VSYSCALL config NUMA bool "Non Uniform Memory Access (NUMA) Support" depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL + select ARCH_WANT_NUMA_VARIABLE_LOCALITY default n help Some SH systems have many various memories scattered around diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 65a872b..97f8c5a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,6 +22,8 @@ config X86 def_bool y select HAVE_AOUT if X86_32 select HAVE_UNSTABLE_SCHED_CLOCK + select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_WANTS_PROT_NUMA_PROT_NONE select HAVE_IDE select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a1f780d..5199db2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -404,7 +404,14 @@ static inline int pte_same(pte_t a, pte_t b) static inline int pte_present(pte_t a) { - return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); + return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE | + _PAGE_NUMA); +} + +#define pte_accessible pte_accessible +static inline int pte_accessible(pte_t a) +{ + return pte_flags(a) & _PAGE_PRESENT; } static inline int pte_hidden(pte_t pte) @@ -420,7 +427,8 @@ static inline int pmd_present(pmd_t pmd) * the _PAGE_PSE flag will remain set at all times while the * _PAGE_PRESENT bit is clear). */ - return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); + return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE | + _PAGE_NUMA); } static inline int pmd_none(pmd_t pmd) @@ -479,6 +487,11 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) static inline int pmd_bad(pmd_t pmd) { +#ifdef CONFIG_NUMA_BALANCING + /* pmd_numa check */ + if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA) + return 0; +#endif return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; } diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index ec8a1fc..3c32db8 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -64,6 +64,26 @@ #define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) +/* + * _PAGE_NUMA indicates that this page will trigger a numa hinting + * minor page fault to gather numa placement statistics (see + * pte_numa()). The bit picked (8) is within the range between + * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't + * require changes to the swp entry format because that bit is always + * zero when the pte is not present. + * + * The bit picked must be always zero when the pmd is present and not + * present, so that we don't lose information when we set it while + * atomically clearing the present bit. + * + * Because we shared the same bit (8) with _PAGE_PROTNONE this can be + * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE + * couldn't reach, like handle_mm_fault() (see access_error in + * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for + * handle_mm_fault() to be invoked). + */ +#define _PAGE_NUMA _PAGE_PROTNONE + #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ _PAGE_ACCESSED | _PAGE_DIRTY) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 217eb70..e27fbf8 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -301,6 +301,13 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_page((unsigned long)pgd); } +/* + * Used to set accessed or dirty bits in the page table entries + * on other architectures. On x86, the accessed and dirty bits + * are tracked by hardware. However, do_wp_page calls this function + * to also make the pte writeable at the same time the dirty bit is + * set. In that case we do actually need to write the PTE. + */ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty) @@ -310,7 +317,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma, if (changed && dirty) { *ptep = entry; pte_update_defer(vma->vm_mm, address, ptep); - flush_tlb_page(vma, address); } return changed; |