diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2016-04-29 23:26:30 +1000 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2016-05-11 21:53:57 +1000 |
commit | bde3eb6222e49673d2ee9dfdc6ab6e7b6ff69e91 (patch) | |
tree | 197a0e6cd5c9c3919852e626acdb67fcba9d42a7 /arch | |
parent | 3df33f12be2f6fa88dff12564ab1d069482e3224 (diff) | |
download | op-kernel-dev-bde3eb6222e49673d2ee9dfdc6ab6e7b6ff69e91.zip op-kernel-dev-bde3eb6222e49673d2ee9dfdc6ab6e7b6ff69e91.tar.gz |
powerpc/mm/radix: Add radix THP callbacks
The deposited pgtable_t is a pte fragment hence we cannot use page->lru
for linking then together. We use the first two 64 bits for pte fragment
as list_head type to link all deposited fragments together. On withdraw
we properly zero then out.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/include/asm/book3s/64/pgtable-64k.h | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/book3s/64/pgtable.h | 16 | ||||
-rw-r--r-- | arch/powerpc/include/asm/book3s/64/radix.h | 22 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-book3s64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-radix.c | 117 |
5 files changed, 158 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h index 79331cf..793ecd5 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -71,6 +71,8 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) static inline pmd_t pmd_mkhuge(pmd_t pmd) { + if (radix_enabled()) + return radix__pmd_mkhuge(pmd); return hash__pmd_mkhuge(pmd); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 5f290d3..1b09ba0 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -827,6 +827,8 @@ extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, extern int hash__has_transparent_hugepage(void); static inline int has_transparent_hugepage(void) { + if (radix_enabled()) + return radix__has_transparent_hugepage(); return hash__has_transparent_hugepage(); } @@ -834,6 +836,8 @@ static inline unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long clr, unsigned long set) { + if (radix_enabled()) + return radix__pmd_hugepage_update(mm, addr, pmdp, clr, set); return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set); } @@ -885,12 +889,16 @@ extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { + if (radix_enabled()) + return radix__pmdp_huge_get_and_clear(mm, addr, pmdp); return hash__pmdp_huge_get_and_clear(mm, addr, pmdp); } static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + if (radix_enabled()) + return radix__pmdp_collapse_flush(vma, address, pmdp); return hash__pmdp_collapse_flush(vma, address, pmdp); } #define pmdp_collapse_flush pmdp_collapse_flush @@ -899,6 +907,8 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, static inline void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { + if (radix_enabled()) + return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable); return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable); } @@ -906,6 +916,8 @@ static inline void pgtable_trans_huge_deposit(struct mm_struct *mm, static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { + if (radix_enabled()) + return radix__pgtable_trans_huge_withdraw(mm, pmdp); return hash__pgtable_trans_huge_withdraw(mm, pmdp); } @@ -917,6 +929,8 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + if (radix_enabled()) + return radix__pmdp_huge_split_prepare(vma, address, pmdp); return hash__pmdp_huge_split_prepare(vma, address, pmdp); } @@ -925,6 +939,8 @@ struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, struct spinlock *old_pmd_ptl) { + if (radix_enabled()) + return false; /* * Archs like ppc64 use pgtable to store per pmd * specific information. So when we switch the pmd, diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index f470902..937d4e2 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -196,6 +196,28 @@ static inline int radix__pmd_trans_huge(pmd_t pmd) return !!(pmd_val(pmd) & _PAGE_PTE); } +static inline pmd_t radix__pmd_mkhuge(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | _PAGE_PTE); +} +static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + /* Nothing to do for radix. */ + return; +} + +extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, unsigned long clr, + unsigned long set); +extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); +extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp); +extern int radix__has_transparent_hugepage(void); #endif extern int __meminit radix__vmemmap_create_mapping(unsigned long start, diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index d566a25..eb44511 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -69,7 +69,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); - + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); /* * This ensures that generic code that rely on IRQ disabling * to prevent a parallel THP split work as expected. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index a5a5253..18b2c11 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -19,6 +19,8 @@ #include <asm/mmu.h> #include <asm/firmware.h> +#include <trace/events/thp.h> + static int native_update_partition_table(u64 patb1) { partition_tb->patb1 = cpu_to_be64(patb1); @@ -407,3 +409,118 @@ void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) } #endif #endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, unsigned long clr, + unsigned long set) +{ + unsigned long old; + +#ifdef CONFIG_DEBUG_VM + WARN_ON(!radix__pmd_trans_huge(*pmdp)); + assert_spin_locked(&mm->page_table_lock); +#endif + + old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1); + trace_hugepage_update(addr, old, clr, set); + + return old; +} + +pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) + +{ + pmd_t pmd; + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + VM_BUG_ON(radix__pmd_trans_huge(*pmdp)); + /* + * khugepaged calls this for normal pmd + */ + pmd = *pmdp; + pmd_clear(pmdp); + /*FIXME!! Verify whether we need this kick below */ + kick_all_cpus_sync(); + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return pmd; +} + +/* + * For us pgtable_t is pte_t *. Inorder to save the deposisted + * page table, we consider the allocated page table as a list + * head. On withdraw we need to make sure we zero out the used + * list_head memory area. + */ +void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; +} + +pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + pte_t *ptep; + pgtable_t pgtable; + struct list_head *lh; + + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + ptep = (pte_t *) pgtable; + *ptep = __pte(0); + ptep++; + *ptep = __pte(0); + return pgtable; +} + + +pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + pmd_t old_pmd; + unsigned long old; + + old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0); + old_pmd = __pmd(old); + /* + * Serialize against find_linux_pte_or_hugepte which does lock-less + * lookup in page tables with local interrupts disabled. For huge pages + * it casts pmd_t to pte_t. Since format of pte_t is different from + * pmd_t we want to prevent transit from pmd pointing to page table + * to pmd pointing to huge page (and back) while interrupts are disabled. + * We clear pmd to possibly replace it with page table pointer in + * different code paths. So make sure we wait for the parallel + * find_linux_pte_or_hugepage to finish. + */ + kick_all_cpus_sync(); + return old_pmd; +} + +int radix__has_transparent_hugepage(void) +{ + /* For radix 2M at PMD level means thp */ + if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT) + return 1; + return 0; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |