summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/8xx_mmu.c2
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c2
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c2
-rw-r--r--arch/powerpc/mm/fault.c17
-rw-r--r--arch/powerpc/mm/hash_native_64.c41
-rw-r--r--arch/powerpc/mm/hash_utils_64.c2
-rw-r--r--arch/powerpc/mm/hugetlbpage.c95
-rw-r--r--arch/powerpc/mm/init_64.c82
-rw-r--r--arch/powerpc/mm/mem.c33
-rw-r--r--arch/powerpc/mm/mmap.c28
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c35
-rw-r--r--arch/powerpc/mm/mmu_decl.h1
-rw-r--r--arch/powerpc/mm/numa.c22
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c4
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c133
-rw-r--r--arch/powerpc/mm/pgtable-radix.c108
-rw-r--r--arch/powerpc/mm/pgtable_32.c15
-rw-r--r--arch/powerpc/mm/pgtable_64.c53
-rw-r--r--arch/powerpc/mm/slb.c10
-rw-r--r--arch/powerpc/mm/slb_low.S30
-rw-r--r--arch/powerpc/mm/tlb-radix.c9
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
22 files changed, 549 insertions, 181 deletions
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 6c5025e..f4c6472 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -88,7 +88,7 @@ static void mmu_mapin_immr(void)
int offset;
for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
- map_page(v + offset, p + offset, f);
+ map_kernel_page(v + offset, p + offset, f);
}
/* Address of instructions to patch */
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 2dc74e5..3825284 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -227,7 +227,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
do {
SetPageReserved(page);
- map_page(vaddr, page_to_phys(page),
+ map_kernel_page(vaddr, page_to_phys(page),
pgprot_val(pgprot_noncached(PAGE_KERNEL)));
page++;
vaddr += PAGE_SIZE;
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index c6b900f..b1c144b 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -335,7 +335,7 @@ static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize)
unsigned long rpn, lp_bits;
int base_psize = 0, actual_psize = 0;
- if (ea <= PAGE_OFFSET)
+ if (ea < PAGE_OFFSET)
return -1;
/* Look in primary table */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 3a7d580..4c42263 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -206,6 +206,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
int is_write = 0;
int trap = TRAP(regs);
int is_exec = trap == 0x400;
+ int is_user = user_mode(regs);
int fault;
int rc = 0, store_update_sp = 0;
@@ -216,7 +217,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* bits we are interested in. But there are some bits which
* indicate errors in DSISR but can validly be set in SRR1.
*/
- if (trap == 0x400)
+ if (is_exec)
error_code &= 0x48200000;
else
is_write = error_code & DSISR_ISSTORE;
@@ -247,13 +248,13 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* The kernel should never take an execute fault nor should it
* take a page fault to a kernel address.
*/
- if (!user_mode(regs) && (is_exec || (address >= TASK_SIZE))) {
+ if (!is_user && (is_exec || (address >= TASK_SIZE))) {
rc = SIGSEGV;
goto bail;
}
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
- defined(CONFIG_PPC_BOOK3S_64))
+ defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_8xx))
if (error_code & DSISR_DABRMATCH) {
/* breakpoint match */
do_break(regs, address, error_code);
@@ -266,7 +267,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
local_irq_enable();
if (faulthandler_disabled() || mm == NULL) {
- if (!user_mode(regs)) {
+ if (!is_user) {
rc = SIGSEGV;
goto bail;
}
@@ -287,10 +288,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* can result in fault, which will cause a deadlock when called with
* mmap_sem held
*/
- if (!is_exec && user_mode(regs))
+ if (is_write && is_user)
store_update_sp = store_updates_sp(regs);
- if (user_mode(regs))
+ if (is_user)
flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to
@@ -309,7 +310,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* thus avoiding the deadlock.
*/
if (!down_read_trylock(&mm->mmap_sem)) {
- if (!user_mode(regs) && !search_exception_tables(regs->nip))
+ if (!is_user && !search_exception_tables(regs->nip))
goto bad_area_nosemaphore;
retry:
@@ -509,7 +510,7 @@ bad_area:
bad_area_nosemaphore:
/* User mode accesses cause a SIGSEGV */
- if (user_mode(regs)) {
+ if (is_user) {
_exception(SIGSEGV, regs, code, address);
goto bail;
}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 65bb8f3..3848af1 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/of.h>
+#include <linux/processor.h>
#include <linux/threads.h>
#include <linux/smp.h>
@@ -23,6 +24,7 @@
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#include <asm/trace.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
#include <asm/udbg.h>
@@ -98,6 +100,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
: "memory");
break;
}
+ trace_tlbie(0, 0, va, 0, 0, 0, 0);
}
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
@@ -147,6 +150,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
: "memory");
break;
}
+ trace_tlbie(0, 1, va, 0, 0, 0, 0);
}
@@ -181,8 +185,10 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
+ spin_begin();
while(test_bit(HPTE_LOCK_BIT, word))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}
}
@@ -407,6 +413,38 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
tlbie(vpn, psize, psize, ssize, 0);
}
+/*
+ * Remove a bolted kernel entry. Memory hotplug uses this.
+ *
+ * No need to lock here because we should be the only user.
+ */
+static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
+{
+ unsigned long vpn;
+ unsigned long vsid;
+ long slot;
+ struct hash_pte *hptep;
+
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+
+ slot = native_hpte_find(vpn, psize, ssize);
+ if (slot == -1)
+ return -ENOENT;
+
+ hptep = htab_address + slot;
+
+ VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED));
+
+ /* Invalidate the hpte */
+ hptep->v = 0;
+
+ /* Invalidate the TLB */
+ tlbie(vpn, psize, psize, ssize, 0);
+ return 0;
+}
+
+
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
int bpsize, int apsize, int ssize, int local)
{
@@ -725,6 +763,7 @@ void __init hpte_init_native(void)
mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
mmu_hash_ops.hpte_updatepp = native_hpte_updatepp;
mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
+ mmu_hash_ops.hpte_removebolted = native_hpte_removebolted;
mmu_hash_ops.hpte_insert = native_hpte_insert;
mmu_hash_ops.hpte_remove = native_hpte_remove;
mmu_hash_ops.hpte_clear_all = native_hpte_clear;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index f2095ce..7a20669 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -810,6 +810,8 @@ static void update_hid_for_hash(void)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ trace_tlbie(0, 0, rb, 0, 2, 0, 0);
+
/*
* now switch the HID
*/
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a4f33de..e1bf5ca 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -17,6 +17,8 @@
#include <linux/memblock.h>
#include <linux/bootmem.h>
#include <linux/moduleparam.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -32,6 +34,7 @@
#define PAGE_SHIFT_16G 34
unsigned int HPAGE_SHIFT;
+EXPORT_SYMBOL(HPAGE_SHIFT);
/*
* Tracks gpages after the device tree is scanned and before the
@@ -55,7 +58,7 @@ static unsigned nr_gpages;
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
/* Only called for hugetlbfs pages, hence can ignore THP */
return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
@@ -77,7 +80,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
num_hugepd = 1;
}
- new = kmem_cache_zalloc(cachep, GFP_KERNEL);
+ new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -617,62 +620,39 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
} while (addr = next, addr != end);
}
-/*
- * We are holding mmap_sem, so a parallel huge page collapse cannot run.
- * To prevent hugepage split, disable irq.
- */
-struct page *
-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
+struct page *follow_huge_pd(struct vm_area_struct *vma,
+ unsigned long address, hugepd_t hpd,
+ int flags, int pdshift)
{
- bool is_thp;
- pte_t *ptep, pte;
- unsigned shift;
- unsigned long mask, flags;
- struct page *page = ERR_PTR(-EINVAL);
-
- local_irq_save(flags);
- ptep = find_linux_pte_or_hugepte(mm->pgd, address, &is_thp, &shift);
- if (!ptep)
- goto no_page;
- pte = READ_ONCE(*ptep);
- /*
- * Verify it is a huge page else bail.
- * Transparent hugepages are handled by generic code. We can skip them
- * here.
- */
- if (!shift || is_thp)
- goto no_page;
-
- if (!pte_present(pte)) {
- page = NULL;
- goto no_page;
+ pte_t *ptep;
+ spinlock_t *ptl;
+ struct page *page = NULL;
+ unsigned long mask;
+ int shift = hugepd_shift(hpd);
+ struct mm_struct *mm = vma->vm_mm;
+
+retry:
+ ptl = &mm->page_table_lock;
+ spin_lock(ptl);
+
+ ptep = hugepte_offset(hpd, address, pdshift);
+ if (pte_present(*ptep)) {
+ mask = (1UL << shift) - 1;
+ page = pte_page(*ptep);
+ page += ((address & mask) >> PAGE_SHIFT);
+ if (flags & FOLL_GET)
+ get_page(page);
+ } else {
+ if (is_hugetlb_entry_migration(*ptep)) {
+ spin_unlock(ptl);
+ __migration_entry_wait(mm, ptep, ptl);
+ goto retry;
+ }
}
- mask = (1UL << shift) - 1;
- page = pte_page(pte);
- if (page)
- page += (address & mask) / PAGE_SIZE;
-
-no_page:
- local_irq_restore(flags);
+ spin_unlock(ptl);
return page;
}
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int write)
-{
- BUG();
- return NULL;
-}
-
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int write)
-{
- BUG();
- return NULL;
-}
-
static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
unsigned long sz)
{
@@ -763,8 +743,11 @@ static int __init add_huge_page_size(unsigned long long size)
* Hash: 16M and 16G
*/
if (radix_enabled()) {
- if (mmu_psize != MMU_PAGE_2M)
- return -EINVAL;
+ if (mmu_psize != MMU_PAGE_2M) {
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
+ (mmu_psize != MMU_PAGE_1G))
+ return -EINVAL;
+ }
} else {
if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
return -EINVAL;
@@ -963,7 +946,7 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
if (pmd_none(pmd))
return NULL;
- if (pmd_trans_huge(pmd)) {
+ if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
if (is_thp)
*is_thp = true;
ret_pte = (pte_t *) pmdp;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index ec84b31..5b4c25d 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -44,6 +44,7 @@
#include <linux/slab.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
+#include <linux/memremap.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -110,8 +111,29 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size)
return 0;
}
+/*
+ * vmemmap virtual address space management does not have a traditonal page
+ * table to track which virtual struct pages are backed by physical mapping.
+ * The virtual to physical mappings are tracked in a simple linked list
+ * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at
+ * all times where as the 'next' list maintains the available
+ * vmemmap_backing structures which have been deleted from the
+ * 'vmemmap_global' list during system runtime (memory hotplug remove
+ * operation). The freed 'vmemmap_backing' structures are reused later when
+ * new requests come in without allocating fresh memory. This pointer also
+ * tracks the allocated 'vmemmap_backing' structures as we allocate one
+ * full page memory at a time when we dont have any.
+ */
struct vmemmap_backing *vmemmap_list;
static struct vmemmap_backing *next;
+
+/*
+ * The same pointer 'next' tracks individual chunks inside the allocated
+ * full page during the boot time and again tracks the freeed nodes during
+ * runtime. It is racy but it does not happen as they are separated by the
+ * boot process. Will create problem if some how we have memory hotplug
+ * operation during boot !!
+ */
static int num_left;
static int num_freed;
@@ -171,13 +193,17 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) {
+ struct vmem_altmap *altmap;
void *p;
int rc;
if (vmemmap_populated(start, page_size))
continue;
- p = vmemmap_alloc_block(page_size, node);
+ /* altmap lookups only work at section boundaries */
+ altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
+
+ p = __vmemmap_alloc_block_buf(page_size, node, altmap);
if (!p)
return -ENOMEM;
@@ -234,13 +260,17 @@ static unsigned long vmemmap_list_free(unsigned long start)
void __ref vmemmap_free(unsigned long start, unsigned long end)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+ unsigned long page_order = get_order(page_size);
start = _ALIGN_DOWN(start, page_size);
pr_debug("vmemmap_free %lx...%lx\n", start, end);
for (; start < end; start += page_size) {
- unsigned long addr;
+ unsigned long nr_pages, addr;
+ struct vmem_altmap *altmap;
+ struct page *section_base;
+ struct page *page;
/*
* the section has already be marked as invalid, so
@@ -251,29 +281,33 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
continue;
addr = vmemmap_list_free(start);
- if (addr) {
- struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
-
- if (PageReserved(page)) {
- /* allocated from bootmem */
- if (page_size < PAGE_SIZE) {
- /*
- * this shouldn't happen, but if it is
- * the case, leave the memory there
- */
- WARN_ON_ONCE(1);
- } else {
- unsigned int nr_pages =
- 1 << get_order(page_size);
- while (nr_pages--)
- free_reserved_page(page++);
- }
- } else
- free_pages((unsigned long)(__va(addr)),
- get_order(page_size));
-
- vmemmap_remove_mapping(start, page_size);
+ if (!addr)
+ continue;
+
+ page = pfn_to_page(addr >> PAGE_SHIFT);
+ section_base = pfn_to_page(vmemmap_section_start(start));
+ nr_pages = 1 << page_order;
+
+ altmap = to_vmem_altmap((unsigned long) section_base);
+ if (altmap) {
+ vmem_altmap_free(altmap, nr_pages);
+ } else if (PageReserved(page)) {
+ /* allocated from bootmem */
+ if (page_size < PAGE_SIZE) {
+ /*
+ * this shouldn't happen, but if it is
+ * the case, leave the memory there
+ */
+ WARN_ON_ONCE(1);
+ } else {
+ while (nr_pages--)
+ free_reserved_page(page++);
+ }
+ } else {
+ free_pages((unsigned long)(__va(addr)), page_order);
}
+
+ vmemmap_remove_mapping(start, page_size);
}
}
#endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9ee536e..46b4e67 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -36,6 +36,7 @@
#include <linux/hugetlb.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/memremap.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -126,18 +127,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
{
- struct pglist_data *pgdata;
- struct zone *zone;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int rc;
resize_hpt_for_hotplug(memblock_phys_mem_size());
- pgdata = NODE_DATA(nid);
-
start = (unsigned long)__va(start);
rc = create_section_mapping(start, start + size);
if (rc) {
@@ -147,11 +144,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
return -EFAULT;
}
- /* this should work for most non-highmem platforms */
- zone = pgdata->node_zones +
- zone_for_memory(nid, start, size, 0, for_device);
-
- return __add_pages(nid, zone, start_pfn, nr_pages);
+ return __add_pages(nid, start_pfn, nr_pages, want_memblock);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -159,11 +152,20 @@ int arch_remove_memory(u64 start, u64 size)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
+ struct vmem_altmap *altmap;
+ struct page *page;
int ret;
- zone = page_zone(pfn_to_page(start_pfn));
- ret = __remove_pages(zone, start_pfn, nr_pages);
+ /*
+ * If we have an altmap then we need to skip over any reserved PFNs
+ * when querying the zone.
+ */
+ page = pfn_to_page(start_pfn);
+ altmap = to_vmem_altmap((unsigned long) page);
+ if (altmap)
+ page += vmem_altmap_offset(altmap);
+
+ ret = __remove_pages(page_zone(page), start_pfn, nr_pages);
if (ret)
return ret;
@@ -313,11 +315,11 @@ void __init paging_init(void)
unsigned long end = __fix_to_virt(FIX_HOLE);
for (; v < end; v += PAGE_SIZE)
- map_page(v, 0, 0); /* XXX gross */
+ map_kernel_page(v, 0, 0); /* XXX gross */
#endif
#ifdef CONFIG_HIGHMEM
- map_page(PKMAP_BASE, 0, 0); /* XXX gross */
+ map_kernel_page(PKMAP_BASE, 0, 0); /* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
@@ -400,6 +402,7 @@ void __init mem_init(void)
void free_initmem(void)
{
ppc_md.progress = ppc_printk_progress;
+ mark_initmem_nx();
free_initmem_default(POISON_FREE_INITMEM);
}
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0ee6be4..5d78b19 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -34,16 +34,9 @@
/*
* Top of mmap area (just below the process stack).
*
- * Leave at least a ~128 MB hole on 32bit applications.
- *
- * On 64bit applications we randomise the stack by 1GB so we need to
- * space our mmap start address by a further 1GB, otherwise there is a
- * chance the mmap area will end up closer to the stack than our ulimit
- * requires.
+ * Leave at least a ~128 MB hole.
*/
-#define MIN_GAP32 (128*1024*1024)
-#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
-#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
+#define MIN_GAP (128*1024*1024)
#define MAX_GAP (TASK_SIZE/6*5)
static inline int mmap_is_legacy(void)
@@ -71,9 +64,26 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
+static inline unsigned long stack_maxrandom_size(void)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+
+ /* 8MB for 32bit, 1GB for 64bit */
+ if (is_32bit_task())
+ return (1<<23);
+ else
+ return (1<<30);
+}
+
static inline unsigned long mmap_base(unsigned long rnd)
{
unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
+
+ /* Values close to RLIM_INFINITY can overflow. */
+ if (gap + pad > gap)
+ gap += pad;
if (gap < MIN_GAP)
gap = MIN_GAP;
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index a3edf81..abed1fe 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -138,6 +138,14 @@ static int radix__init_new_context(struct mm_struct *mm)
rts_field = radix__get_tree_size();
process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
+ /*
+ * Order the above store with subsequent update of the PID
+ * register (at which point HW can start loading/caching
+ * the entry) and the corresponding load by the MMU from
+ * the L2 cache.
+ */
+ asm volatile("ptesync;isync" : : : "memory");
+
mm->context.npu_context = NULL;
return index;
@@ -223,9 +231,15 @@ void destroy_context(struct mm_struct *mm)
mm->context.cop_lockp = NULL;
#endif /* CONFIG_PPC_ICSWX */
- if (radix_enabled())
- process_tb[mm->context.id].prtb1 = 0;
- else
+ if (radix_enabled()) {
+ /*
+ * Radix doesn't have a valid bit in the process table
+ * entries. However we know that at least P9 implementation
+ * will avoid caching an entry with an invalid RTS field,
+ * and 0 is invalid. So this will do.
+ */
+ process_tb[mm->context.id].prtb0 = 0;
+ } else
subpage_prot_free(mm);
destroy_pagetable_page(mm);
__destroy_context(mm->context.id);
@@ -235,10 +249,15 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_PPC_RADIX_MMU
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
- asm volatile("isync": : :"memory");
- mtspr(SPRN_PID, next->context.id);
- asm volatile("isync \n"
- PPC_SLBIA(0x7)
- : : :"memory");
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ isync();
+ mtspr(SPRN_PID, next->context.id);
+ isync();
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+ } else {
+ mtspr(SPRN_PID, next->context.id);
+ isync();
+ }
}
#endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index f988db6..d46128b 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -94,7 +94,6 @@ extern void _tlbia(void);
#ifdef CONFIG_PPC32
extern void mapin_ram(void);
-extern int map_page(unsigned long va, phys_addr_t pa, int flags);
extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 371792e..b95c584 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1311,8 +1311,10 @@ static int update_lookup_table(void *data)
/*
* Update the node maps and sysfs entries for each cpu whose home node
* has changed. Returns 1 when the topology has changed, and 0 otherwise.
+ *
+ * cpus_locked says whether we already hold cpu_hotplug_lock.
*/
-int arch_update_cpu_topology(void)
+int numa_update_cpu_topology(bool cpus_locked)
{
unsigned int cpu, sibling, changed = 0;
struct topology_update_data *updates, *ud;
@@ -1400,15 +1402,23 @@ int arch_update_cpu_topology(void)
if (!cpumask_weight(&updated_cpus))
goto out;
- stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
+ if (cpus_locked)
+ stop_machine_cpuslocked(update_cpu_topology, &updates[0],
+ &updated_cpus);
+ else
+ stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
/*
* Update the numa-cpu lookup table with the new mappings, even for
* offline CPUs. It is best to perform this update from the stop-
* machine context.
*/
- stop_machine(update_lookup_table, &updates[0],
+ if (cpus_locked)
+ stop_machine_cpuslocked(update_lookup_table, &updates[0],
cpumask_of(raw_smp_processor_id()));
+ else
+ stop_machine(update_lookup_table, &updates[0],
+ cpumask_of(raw_smp_processor_id()));
for (ud = &updates[0]; ud; ud = ud->next) {
unregister_cpu_under_node(ud->cpu, ud->old_nid);
@@ -1426,6 +1436,12 @@ out:
return changed;
}
+int arch_update_cpu_topology(void)
+{
+ lockdep_assert_cpus_held();
+ return numa_update_cpu_topology(true);
+}
+
static void topology_work_fn(struct work_struct *work)
{
rebuild_sched_domains();
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 5fcb3dd..31eed8f 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -32,7 +32,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
{
int changed;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
+ WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&vma->vm_mm->page_table_lock);
#endif
changed = !pmd_same(*(pmdp), entry);
@@ -59,7 +59,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
#ifdef CONFIG_DEBUG_VM
WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
assert_spin_locked(&mm->page_table_lock);
- WARN_ON(!pmd_trans_huge(pmd));
+ WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
#endif
trace_hugepage_set_pmd(addr, pmd_val(pmd));
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 8b85a14b..443a2c6 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -11,8 +11,12 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
+#include <linux/mm.h>
#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/mmu.h>
#include <asm/tlb.h>
#include "mmu_decl.h"
@@ -22,6 +26,81 @@
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
+ * vmemmap is the starting address of the virtual address space where
+ * struct pages are allocated for all possible PFNs present on the system
+ * including holes and bad memory (hence sparse). These virtual struct
+ * pages are stored in sequence in this virtual address space irrespective
+ * of the fact whether the corresponding PFN is valid or not. This achieves
+ * constant relationship between address of struct page and its PFN.
+ *
+ * During boot or memory hotplug operation when a new memory section is
+ * added, physical memory allocation (including hash table bolting) will
+ * be performed for the set of struct pages which are part of the memory
+ * section. This saves memory by not allocating struct pages for PFNs
+ * which are not valid.
+ *
+ * ----------------------------------------------
+ * | PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES|
+ * ----------------------------------------------
+ *
+ * f000000000000000 c000000000000000
+ * vmemmap +--------------+ +--------------+
+ * + | page struct | +--------------> | page struct |
+ * | +--------------+ +--------------+
+ * | | page struct | +--------------> | page struct |
+ * | +--------------+ | +--------------+
+ * | | page struct | + +------> | page struct |
+ * | +--------------+ | +--------------+
+ * | | page struct | | +--> | page struct |
+ * | +--------------+ | | +--------------+
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | +-------+ |
+ * | +--------------+ |
+ * | | page struct | +-----------+
+ * | +--------------+
+ * | | page struct | No mapping
+ * | +--------------+
+ * | | page struct | No mapping
+ * v +--------------+
+ *
+ * -----------------------------------------
+ * | RELATION BETWEEN STRUCT PAGES AND PFNS|
+ * -----------------------------------------
+ *
+ * vmemmap +--------------+ +---------------+
+ * + | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * v +--------------+ +---------------+
+ */
+/*
* On hash-based CPUs, the vmemmap is bolted in the hash table.
*
*/
@@ -109,7 +188,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr
unsigned long old;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
+ WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
@@ -141,6 +220,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(pmd_trans_huge(*pmdp));
+ VM_BUG_ON(pmd_devmap(*pmdp));
pmd = *pmdp;
pmd_clear(pmdp);
@@ -221,6 +301,7 @@ void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma,
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+ VM_BUG_ON(pmd_devmap(*pmdp));
/*
* We can't mark the pmd none here, because that will cause a race
@@ -342,3 +423,53 @@ int hash__has_transparent_hugepage(void)
return 1;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static bool hash__change_memory_range(unsigned long start, unsigned long end,
+ unsigned long newpp)
+{
+ unsigned long idx;
+ unsigned int step, shift;
+
+ shift = mmu_psize_defs[mmu_linear_psize].shift;
+ step = 1 << shift;
+
+ start = ALIGN_DOWN(start, step);
+ end = ALIGN(end, step); // aligns up
+
+ if (start >= end)
+ return false;
+
+ pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
+ start, end, newpp, step);
+
+ for (idx = start; idx < end; idx += step)
+ /* Not sure if we can do much with the return value */
+ mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
+ mmu_kernel_ssize);
+
+ return true;
+}
+
+void hash__mark_rodata_ro(void)
+{
+ unsigned long start, end;
+
+ start = (unsigned long)_stext;
+ end = (unsigned long)__init_begin;
+
+ WARN_ON(!hash__change_memory_range(start, end, PP_RXXX));
+}
+
+void hash__mark_initmem_nx(void)
+{
+ unsigned long start, end, pp;
+
+ start = (unsigned long)__init_begin;
+ end = (unsigned long)__init_end;
+
+ pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL));
+
+ WARN_ON(!hash__change_memory_range(start, end, pp));
+}
+#endif
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index c28165d..5cc50d4 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -11,6 +11,7 @@
#include <linux/sched/mm.h>
#include <linux/memblock.h>
#include <linux/of_fdt.h>
+#include <linux/mm.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -19,6 +20,8 @@
#include <asm/mmu.h>
#include <asm/firmware.h>
#include <asm/powernv.h>
+#include <asm/sections.h>
+#include <asm/trace.h>
#include <trace/events/thp.h>
@@ -108,6 +111,67 @@ set_the_pte:
return 0;
}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void radix__change_memory_range(unsigned long start, unsigned long end,
+ unsigned long clear)
+{
+ unsigned long idx;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = PAGE_ALIGN(end); // aligns up
+
+ pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
+ start, end, clear);
+
+ for (idx = start; idx < end; idx += PAGE_SIZE) {
+ pgdp = pgd_offset_k(idx);
+ pudp = pud_alloc(&init_mm, pgdp, idx);
+ if (!pudp)
+ continue;
+ if (pud_huge(*pudp)) {
+ ptep = (pte_t *)pudp;
+ goto update_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, idx);
+ if (!pmdp)
+ continue;
+ if (pmd_huge(*pmdp)) {
+ ptep = pmdp_ptep(pmdp);
+ goto update_the_pte;
+ }
+ ptep = pte_alloc_kernel(pmdp, idx);
+ if (!ptep)
+ continue;
+update_the_pte:
+ radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
+ }
+
+ radix__flush_tlb_kernel_range(start, end);
+}
+
+void radix__mark_rodata_ro(void)
+{
+ unsigned long start, end;
+
+ start = (unsigned long)_stext;
+ end = (unsigned long)__init_begin;
+
+ radix__change_memory_range(start, end, _PAGE_WRITE);
+}
+
+void radix__mark_initmem_nx(void)
+{
+ unsigned long start = (unsigned long)__init_begin;
+ unsigned long end = (unsigned long)__init_end;
+
+ radix__change_memory_range(start, end, _PAGE_EXEC);
+}
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+
static inline void __meminit print_mapping(unsigned long start,
unsigned long end,
unsigned long size)
@@ -121,7 +185,14 @@ static inline void __meminit print_mapping(unsigned long start,
static int __meminit create_physical_mapping(unsigned long start,
unsigned long end)
{
- unsigned long addr, mapping_size = 0;
+ unsigned long vaddr, addr, mapping_size = 0;
+ pgprot_t prot;
+ unsigned long max_mapping_size;
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ int split_text_mapping = 1;
+#else
+ int split_text_mapping = 0;
+#endif
start = _ALIGN_UP(start, PAGE_SIZE);
for (addr = start; addr < end; addr += mapping_size) {
@@ -130,9 +201,12 @@ static int __meminit create_physical_mapping(unsigned long start,
gap = end - addr;
previous_size = mapping_size;
+ max_mapping_size = PUD_SIZE;
+retry:
if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
- mmu_psize_defs[MMU_PAGE_1G].shift)
+ mmu_psize_defs[MMU_PAGE_1G].shift &&
+ PUD_SIZE <= max_mapping_size)
mapping_size = PUD_SIZE;
else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
mmu_psize_defs[MMU_PAGE_2M].shift)
@@ -140,13 +214,32 @@ static int __meminit create_physical_mapping(unsigned long start,
else
mapping_size = PAGE_SIZE;
+ if (split_text_mapping && (mapping_size == PUD_SIZE) &&
+ (addr <= __pa_symbol(__init_begin)) &&
+ (addr + mapping_size) >= __pa_symbol(_stext)) {
+ max_mapping_size = PMD_SIZE;
+ goto retry;
+ }
+
+ if (split_text_mapping && (mapping_size == PMD_SIZE) &&
+ (addr <= __pa_symbol(__init_begin)) &&
+ (addr + mapping_size) >= __pa_symbol(_stext))
+ mapping_size = PAGE_SIZE;
+
if (mapping_size != previous_size) {
print_mapping(start, addr, previous_size);
start = addr;
}
- rc = radix__map_kernel_page((unsigned long)__va(addr), addr,
- PAGE_KERNEL_X, mapping_size);
+ vaddr = (unsigned long)__va(addr);
+
+ if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
+ overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size))
+ prot = PAGE_KERNEL_X;
+ else
+ prot = PAGE_KERNEL;
+
+ rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
if (rc)
return rc;
}
@@ -190,6 +283,7 @@ static void __init radix_init_pgtable(void)
asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (0));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
}
static void __init radix_init_partition_table(void)
@@ -316,6 +410,9 @@ static void update_hid_for_radix(void)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ trace_tlbie(0, 0, rb, 0, 2, 0, 1);
+ trace_tlbie(0, 0, rb, 0, 2, 1, 1);
+
/*
* now switch the HID
*/
@@ -683,7 +780,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add
unsigned long old;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!radix__pmd_trans_huge(*pmdp));
+ WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
@@ -701,6 +798,7 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
+ VM_BUG_ON(pmd_devmap(*pmdp));
/*
* khugepaged calls this for normal pmd
*/
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a65c0b4..a9e4bfc 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -60,7 +60,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;
- gfp_t flags = GFP_KERNEL | __GFP_ZERO;
+ gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT;
ptepage = alloc_pages(flags, 0);
if (!ptepage)
@@ -189,7 +189,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
err = 0;
for (i = 0; i < size && err == 0; i += PAGE_SIZE)
- err = map_page(v+i, p+i, flags);
+ err = map_kernel_page(v+i, p+i, flags);
if (err) {
if (slab_is_available())
vunmap((void *)v);
@@ -215,7 +215,7 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-int map_page(unsigned long va, phys_addr_t pa, int flags)
+int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
{
pmd_t *pd;
pte_t *pg;
@@ -255,7 +255,7 @@ void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
ktext = ((char *)v >= _stext && (char *)v < etext) ||
((char *)v >= _sinittext && (char *)v < _einittext);
f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL);
- map_page(v, p, f);
+ map_kernel_page(v, p, f);
#ifdef CONFIG_PPC_STD_MMU_32
if (ktext)
hash_preload(&init_mm, v, 0, 0x300);
@@ -387,11 +387,6 @@ void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
return;
}
- map_page(address, phys, pgprot_val(flags));
+ map_kernel_page(address, phys, pgprot_val(flags));
fixmaps++;
}
-
-void __this_fixmap_does_not_exist(void)
-{
- WARN_ON(1);
-}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index db93cf7..0736e94 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -47,6 +47,7 @@
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/tlb.h>
+#include <asm/trace.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/sections.h>
@@ -323,7 +324,7 @@ struct page *pud_page(pud_t pud)
*/
struct page *pmd_page(pmd_t pmd)
{
- if (pmd_trans_huge(pmd) || pmd_huge(pmd))
+ if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
return pte_page(pmd_pte(pmd));
return virt_to_page(pmd_page_vaddr(pmd));
}
@@ -351,12 +352,20 @@ static pte_t *get_from_cache(struct mm_struct *mm)
static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
{
void *ret = NULL;
- struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
- if (!page)
- return NULL;
- if (!kernel && !pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
+ struct page *page;
+
+ if (!kernel) {
+ page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ } else {
+ page = alloc_page(PGALLOC_GFP);
+ if (!page)
+ return NULL;
}
ret = page_address(page);
@@ -469,13 +478,39 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
* use of this partition ID was, not the new use.
*/
asm volatile("ptesync" : : : "memory");
- if (old & PATB_HR)
+ if (old & PATB_HR) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- else
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
+ } else {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ }
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
+{
+ if (!mmu_has_feature(MMU_FTR_KERNEL_RO)) {
+ pr_warn("Warning: Unable to mark rodata read only on this CPU.\n");
+ return;
+ }
+
+ if (radix_enabled())
+ radix__mark_rodata_ro();
+ else
+ hash__mark_rodata_ro();
+}
+
+void mark_initmem_nx(void)
+{
+ if (radix_enabled())
+ radix__mark_initmem_nx();
+ else
+ hash__mark_initmem_nx();
+}
+#endif
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 654a0d7..13cfe41 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -33,15 +33,7 @@ enum slb_index {
KSTACK_INDEX = 2, /* Kernel stack map */
};
-extern void slb_allocate_realmode(unsigned long ea);
-
-static void slb_allocate(unsigned long ea)
-{
- /* Currently, we do real mode for all SLBs including user, but
- * that will change if we bring back dynamic VSIDs
- */
- slb_allocate_realmode(ea);
-}
+extern void slb_allocate(unsigned long ea);
#define slb_esid_mask(ssize) \
(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 1519617..bde3785 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -65,14 +65,15 @@ MMU_FTR_SECTION_ELSE \
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
-/* void slb_allocate_realmode(unsigned long ea);
+/* void slb_allocate(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
* r3 = faulting address, r13 = PACA
* r9, r10, r11 are clobbered by this function
+ * r3 is preserved.
* No other registers are examined or changed.
*/
-_GLOBAL(slb_allocate_realmode)
+_GLOBAL(slb_allocate)
/*
* check for bad kernel/user address
* (ea & ~REGION_MASK) >= PGTABLE_RANGE
@@ -235,6 +236,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
* dont have any LRU information to help us choose a slot.
*/
+ mr r9,r3
+
+ /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */
7: ld r10,PACASTABRR(r13)
addi r10,r10,1
/* This gets soft patched on boot. */
@@ -249,10 +253,10 @@ slb_compare_rr_to_size:
std r10,PACASTABRR(r13)
3:
- rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
- oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
+ rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */
+ oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */
- /* r3 = ESID data, r11 = VSID data */
+ /* r9 = ESID data, r11 = VSID data */
/*
* No need for an isync before or after this slbmte. The exception
@@ -265,21 +269,21 @@ slb_compare_rr_to_size:
bgelr cr7
/* Update the slb cache */
- lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
- cmpldi r3,SLB_CACHE_ENTRIES
+ lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
+ cmpldi r9,SLB_CACHE_ENTRIES
bge 1f
/* still room in the slb cache */
- sldi r11,r3,2 /* r11 = offset * sizeof(u32) */
+ sldi r11,r9,2 /* r11 = offset * sizeof(u32) */
srdi r10,r10,28 /* get the 36 bits of the ESID */
add r11,r11,r13 /* r11 = (u32 *)paca + offset */
stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
- addi r3,r3,1 /* offset++ */
+ addi r9,r9,1 /* offset++ */
b 2f
1: /* offset >= SLB_CACHE_ENTRIES */
- li r3,SLB_CACHE_ENTRIES+1
+ li r9,SLB_CACHE_ENTRIES+1
2:
- sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+ sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
crclr 4*cr0+eq /* set result to "success" */
blr
@@ -301,11 +305,11 @@ slb_compare_rr_to_size:
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
/* r3 = EA, r11 = VSID data */
- clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */
+ clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */
b 7b
-_ASM_NOKPROBE_SYMBOL(slb_allocate_realmode)
+_ASM_NOKPROBE_SYMBOL(slb_allocate)
_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 02e7140..744e016 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -16,6 +16,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/trace.h>
#define RIC_FLUSH_TLB 0
@@ -35,6 +36,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
/*
@@ -87,6 +89,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
@@ -104,6 +107,7 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
static inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -121,6 +125,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
/*
@@ -377,6 +382,7 @@ void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
EXPORT_SYMBOL(radix__flush_tlb_lpid_va);
@@ -394,6 +400,7 @@ void radix__flush_tlb_lpid(unsigned long lpid)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
EXPORT_SYMBOL(radix__flush_tlb_lpid);
@@ -420,12 +427,14 @@ void radix__flush_tlb_all(void)
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
/*
* now flush host entires by passing PRS = 0 and LPID == 0
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(0, 0, rb, 0, ric, prs, r);
}
void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 4517aa4..b5b0fb9 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -93,12 +93,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
/*
* Check if we have an active batch on this CPU. If not, just
- * flush now and return. For now, we don global invalidates
- * in that case, might be worth testing the mm cpu mask though
- * and decide to use local invalidates instead...
+ * flush now and return.
*/
if (!batch->active) {
- flush_hash_page(vpn, rpte, psize, ssize, 0);
+ flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm));
put_cpu_var(ppc64_tlb_batch);
return;
}
OpenPOWER on IntegriCloud