summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/hugetlbpage.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r--arch/powerpc/mm/hugetlbpage.c192
1 files changed, 152 insertions, 40 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a6de0a..237c8e5 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -48,30 +48,71 @@ static u64 gpage_freearray[MAX_NUMBER_GPAGES];
static unsigned nr_gpages;
#endif
-static inline int shift_to_mmu_psize(unsigned int shift)
+#define hugepd_none(hpd) ((hpd).pd == 0)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * At this point we do the placement change only for BOOK3S 64. This would
+ * possibly work on other subarchs.
+ */
+
+/*
+ * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
+ * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
+ */
+int pmd_huge(pmd_t pmd)
{
- int psize;
+ /*
+ * leaf pte for huge page, bottom two bits != 00
+ */
+ return ((pmd_val(pmd) & 0x3) != 0x0);
+}
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
- if (mmu_psize_defs[psize].shift == shift)
- return psize;
- return -1;
+int pud_huge(pud_t pud)
+{
+ /*
+ * leaf pte for huge page, bottom two bits != 00
+ */
+ return ((pud_val(pud) & 0x3) != 0x0);
}
-static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
+int pgd_huge(pgd_t pgd)
{
- if (mmu_psize_defs[mmu_psize].shift)
- return mmu_psize_defs[mmu_psize].shift;
- BUG();
+ /*
+ * leaf pte for huge page, bottom two bits != 00
+ */
+ return ((pgd_val(pgd) & 0x3) != 0x0);
+}
+#else
+int pmd_huge(pmd_t pmd)
+{
+ return 0;
}
-#define hugepd_none(hpd) ((hpd).pd == 0)
+int pud_huge(pud_t pud)
+{
+ return 0;
+}
+
+int pgd_huge(pgd_t pgd)
+{
+ return 0;
+}
+#endif
+/*
+ * We have 4 cases for pgds and pmds:
+ * (1) invalid (all zeroes)
+ * (2) pointer to next table, as normal; bottom 6 bits == 0
+ * (3) leaf pte for huge page, bottom two bits != 00
+ * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
+ */
pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
{
pgd_t *pg;
pud_t *pu;
pmd_t *pm;
+ pte_t *ret_pte;
hugepd_t *hpdp = NULL;
unsigned pdshift = PGDIR_SHIFT;
@@ -79,30 +120,43 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
*shift = 0;
pg = pgdir + pgd_index(ea);
- if (is_hugepd(pg)) {
+
+ if (pgd_huge(*pg)) {
+ ret_pte = (pte_t *) pg;
+ goto out;
+ } else if (is_hugepd(pg))
hpdp = (hugepd_t *)pg;
- } else if (!pgd_none(*pg)) {
+ else if (!pgd_none(*pg)) {
pdshift = PUD_SHIFT;
pu = pud_offset(pg, ea);
- if (is_hugepd(pu))
+
+ if (pud_huge(*pu)) {
+ ret_pte = (pte_t *) pu;
+ goto out;
+ } else if (is_hugepd(pu))
hpdp = (hugepd_t *)pu;
else if (!pud_none(*pu)) {
pdshift = PMD_SHIFT;
pm = pmd_offset(pu, ea);
- if (is_hugepd(pm))
+
+ if (pmd_huge(*pm)) {
+ ret_pte = (pte_t *) pm;
+ goto out;
+ } else if (is_hugepd(pm))
hpdp = (hugepd_t *)pm;
- else if (!pmd_none(*pm)) {
+ else if (!pmd_none(*pm))
return pte_offset_kernel(pm, ea);
- }
}
}
-
if (!hpdp)
return NULL;
+ ret_pte = hugepte_offset(hpdp, ea, pdshift);
+ pdshift = hugepd_shift(*hpdp);
+out:
if (shift)
- *shift = hugepd_shift(*hpdp);
- return hugepte_offset(hpdp, ea, pdshift);
+ *shift = pdshift;
+ return ret_pte;
}
EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
@@ -145,6 +199,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (unlikely(!hugepd_none(*hpdp)))
break;
else
+ /* We use the old format for PPC_FSL_BOOK3E */
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
}
/* If we bailed from the for loop early, an error occurred, clean up */
@@ -156,9 +211,15 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#else
if (!hugepd_none(*hpdp))
kmem_cache_free(cachep, new);
- else
+ else {
+#ifdef CONFIG_PPC_BOOK3S_64
+ hpdp->pd = (unsigned long)new |
+ (shift_to_mmu_psize(pshift) << 2);
+#else
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
+ }
+#endif
spin_unlock(&mm->page_table_lock);
return 0;
}
@@ -175,6 +236,61 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * At this point we do the placement change only for BOOK3S 64. This would
+ * possibly work on other subarchs.
+ */
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+{
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ hugepd_t *hpdp = NULL;
+ unsigned pshift = __ffs(sz);
+ unsigned pdshift = PGDIR_SHIFT;
+
+ addr &= ~(sz-1);
+ pg = pgd_offset(mm, addr);
+
+ if (pshift == PGDIR_SHIFT)
+ /* 16GB huge page */
+ return (pte_t *) pg;
+ else if (pshift > PUD_SHIFT)
+ /*
+ * We need to use hugepd table
+ */
+ hpdp = (hugepd_t *)pg;
+ else {
+ pdshift = PUD_SHIFT;
+ pu = pud_alloc(mm, pg, addr);
+ if (pshift == PUD_SHIFT)
+ return (pte_t *)pu;
+ else if (pshift > PMD_SHIFT)
+ hpdp = (hugepd_t *)pu;
+ else {
+ pdshift = PMD_SHIFT;
+ pm = pmd_alloc(mm, pu, addr);
+ if (pshift == PMD_SHIFT)
+ /* 16MB hugepage */
+ return (pte_t *)pm;
+ else
+ hpdp = (hugepd_t *)pm;
+ }
+ }
+ if (!hpdp)
+ return NULL;
+
+ BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
+
+ if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
+ return NULL;
+
+ return hugepte_offset(hpdp, addr, pdshift);
+}
+
+#else
+
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
pgd_t *pg;
@@ -212,6 +328,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
return hugepte_offset(hpdp, addr, pdshift);
}
+#endif
#ifdef CONFIG_PPC_FSL_BOOK3E
/* Build list of addresses of gigantic pages. This function is used in early
@@ -475,7 +592,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
do {
pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd))
+ if (pmd_none_or_clear_bad(pmd))
continue;
#ifdef CONFIG_PPC_FSL_BOOK3E
/*
@@ -628,16 +745,6 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
return page;
}
-int pmd_huge(pmd_t pmd)
-{
- return 0;
-}
-
-int pud_huge(pud_t pud)
-{
- return 0;
-}
-
struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
@@ -646,8 +753,8 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
return NULL;
}
-static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
+int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask;
unsigned long pte_end;
@@ -742,7 +849,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
- return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
+ return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif
@@ -883,11 +990,16 @@ static int __init hugetlbpage_init(void)
pdshift = PUD_SHIFT;
else
pdshift = PGDIR_SHIFT;
-
- pgtable_cache_add(pdshift - shift, NULL);
- if (!PGT_CACHE(pdshift - shift))
- panic("hugetlbpage_init(): could not create "
- "pgtable cache for %d bit pagesize\n", shift);
+ /*
+ * if we have pdshift and shift value same, we don't
+ * use pgt cache for hugepd.
+ */
+ if (pdshift != shift) {
+ pgtable_cache_add(pdshift - shift, NULL);
+ if (!PGT_CACHE(pdshift - shift))
+ panic("hugetlbpage_init(): could not create "
+ "pgtable cache for %d bit pagesize\n", shift);
+ }
}
/* Set default large page size. Currently, we pick 16M or 1M
OpenPOWER on IntegriCloud