From a13cff318cafbd493b8d5d679e5f3f761084c4fe Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 23 Oct 2014 12:07:14 +0200 Subject: s390/mm: recfactor global pgste updates Replace the s390 specific page table walker for the pgste updates with a call to the common code walk_page_range function. There are now two pte modification functions, one for the reset of the CMMA state and another one for the initialization of the storage keys. Signed-off-by: Dominik Dingel Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 153 +++++++++++++++++-------------------------------- 1 file changed, 54 insertions(+), 99 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 1b79ca6..019afdf 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -834,99 +834,6 @@ static inline void page_table_free_pgste(unsigned long *table) __free_page(page); } -static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, unsigned long end, bool init_skey) -{ - pte_t *start_pte, *pte; - spinlock_t *ptl; - pgste_t pgste; - - start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); - pte = start_pte; - do { - pgste = pgste_get_lock(pte); - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; - if (init_skey) { - unsigned long address; - - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | - PGSTE_GR_BIT | PGSTE_GC_BIT); - - /* skip invalid and not writable pages */ - if (pte_val(*pte) & _PAGE_INVALID || - !(pte_val(*pte) & _PAGE_WRITE)) { - pgste_set_unlock(pte, pgste); - continue; - } - - address = pte_val(*pte) & PAGE_MASK; - page_set_storage_key(address, PAGE_DEFAULT_KEY, 1); - } - pgste_set_unlock(pte, pgste); - } while (pte++, addr += PAGE_SIZE, addr != end); - pte_unmap_unlock(start_pte, ptl); - - return addr; -} - -static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end, bool init_skey) -{ - unsigned long next; - pmd_t *pmd; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) - continue; - next = page_table_reset_pte(mm, pmd, addr, next, init_skey); - } while (pmd++, addr = next, addr != end); - - return addr; -} - -static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end, bool init_skey) -{ - unsigned long next; - pud_t *pud; - - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - next = page_table_reset_pmd(mm, pud, addr, next, init_skey); - } while (pud++, addr = next, addr != end); - - return addr; -} - -void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, - unsigned long end, bool init_skey) -{ - unsigned long addr, next; - pgd_t *pgd; - - down_write(&mm->mmap_sem); - if (init_skey && mm_use_skey(mm)) - goto out_up; - addr = start; - pgd = pgd_offset(mm, addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - next = page_table_reset_pud(mm, pgd, addr, next, init_skey); - } while (pgd++, addr = next, addr != end); - if (init_skey) - current->mm->context.use_skey = 1; -out_up: - up_write(&mm->mmap_sem); -} -EXPORT_SYMBOL(page_table_reset_pgste); - int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq) { @@ -992,11 +899,6 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) return NULL; } -void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, - unsigned long end, bool init_skey) -{ -} - static inline void page_table_free_pgste(unsigned long *table) { } @@ -1347,13 +1249,66 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); * Enable storage key handling from now on and initialize the storage * keys with the default key. */ +static int __s390_enable_skey(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + unsigned long ptev; + pgste_t pgste; + + pgste = pgste_get_lock(pte); + /* Clear storage key */ + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | + PGSTE_GR_BIT | PGSTE_GC_BIT); + ptev = pte_val(*pte); + if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) + page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); + pgste_set_unlock(pte, pgste); + return 0; +} + void s390_enable_skey(void) { - page_table_reset_pgste(current->mm, 0, TASK_SIZE, true); + struct mm_walk walk = { .pte_entry = __s390_enable_skey }; + struct mm_struct *mm = current->mm; + + down_write(&mm->mmap_sem); + if (mm_use_skey(mm)) + goto out_up; + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + mm->context.use_skey = 1; + +out_up: + up_write(&mm->mmap_sem); } EXPORT_SYMBOL_GPL(s390_enable_skey); /* + * Reset CMMA state, make all pages stable again. + */ +static int __s390_reset_cmma(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pgste_t pgste; + + pgste = pgste_get_lock(pte); + pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + pgste_set_unlock(pte, pgste); + return 0; +} + +void s390_reset_cmma(struct mm_struct *mm) +{ + struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; + + down_write(&mm->mmap_sem); + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + up_write(&mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(s390_reset_cmma); + +/* * Test and reset if a guest page is dirty */ bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) -- cgit v1.1 From 2faee8ff9dc6f4bfe46f6d2d110add858140fb20 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 23 Oct 2014 12:08:38 +0200 Subject: s390/mm: prevent and break zero page mappings in case of storage keys As soon as storage keys are enabled we need to stop working on zero page mappings to prevent inconsistencies between storage keys and pgste. Otherwise following data corruption could happen: 1) guest enables storage key 2) guest sets storage key for not mapped page X -> change goes to PGSTE 3) guest reads from page X -> as X was not dirty before, the page will be zero page backed, storage key from PGSTE for X will go to storage key for zero page 4) guest sets storage key for not mapped page Y (same logic as above 5) guest reads from page Y -> as Y was not dirty before, the page will be zero page backed, storage key from PGSTE for Y will got to storage key for zero page overwriting storage key for X While holding the mmap sem, we are safe against changes on entries we already fixed, as every fault would need to take the mmap_sem (read). Other vCPUs executing storage key instructions will get a one time interception and be serialized also with mmap_sem. Signed-off-by: Dominik Dingel Reviewed-by: Paolo Bonzini Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 019afdf..0f1e9ff6 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1256,6 +1256,15 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr, pgste_t pgste; pgste = pgste_get_lock(pte); + /* + * Remove all zero page mappings, + * after establishing a policy to forbid zero page mappings + * following faults for that page will get fresh anonymous pages + */ + if (is_zero_pfn(pte_pfn(*pte))) { + ptep_flush_direct(walk->mm, addr, pte); + pte_val(*pte) = _PAGE_INVALID; + } /* Clear storage key */ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT); @@ -1274,9 +1283,11 @@ void s390_enable_skey(void) down_write(&mm->mmap_sem); if (mm_use_skey(mm)) goto out_up; + + mm->context.use_skey = 1; + walk.mm = mm; walk_page_range(0, TASK_SIZE, &walk); - mm->context.use_skey = 1; out_up: up_write(&mm->mmap_sem); -- cgit v1.1 From 3ac8e38015d4fd1c12e4e048a01a9f059a2053a2 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 23 Oct 2014 12:09:17 +0200 Subject: s390/mm: disable KSM for storage key enabled pages When storage keys are enabled unmerge already merged pages and prevent new pages from being merged. Signed-off-by: Dominik Dingel Acked-by: Christian Borntraeger Reviewed-by: Paolo Bonzini Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0f1e9ff6..b1871d3 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include @@ -1275,22 +1277,34 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr, return 0; } -void s390_enable_skey(void) +int s390_enable_skey(void) { struct mm_walk walk = { .pte_entry = __s390_enable_skey }; struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc = 0; down_write(&mm->mmap_sem); if (mm_use_skey(mm)) goto out_up; mm->context.use_skey = 1; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (ksm_madvise(vma, vma->vm_start, vma->vm_end, + MADV_UNMERGEABLE, &vma->vm_flags)) { + mm->context.use_skey = 0; + rc = -ENOMEM; + goto out_up; + } + } + mm->def_flags &= ~VM_MERGEABLE; walk.mm = mm; walk_page_range(0, TASK_SIZE, &walk); out_up: up_write(&mm->mmap_sem); + return rc; } EXPORT_SYMBOL_GPL(s390_enable_skey); -- cgit v1.1 From 7a5388de5c70f7a92de71e03ce72692c1827d162 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 22 Oct 2014 12:42:38 +0200 Subject: s390/kprobes: make use of NOKPROBE_SYMBOL() Use NOKPROBE_SYMBOL() instead of __kprobes annotation. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a2b81d6..ca70fad 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -548,7 +548,7 @@ out: return fault; } -void __kprobes do_protection_exception(struct pt_regs *regs) +void do_protection_exception(struct pt_regs *regs) { unsigned long trans_exc_code; int fault; @@ -574,8 +574,9 @@ void __kprobes do_protection_exception(struct pt_regs *regs) if (unlikely(fault)) do_fault_error(regs, fault); } +NOKPROBE_SYMBOL(do_protection_exception); -void __kprobes do_dat_exception(struct pt_regs *regs) +void do_dat_exception(struct pt_regs *regs) { int access, fault; @@ -584,6 +585,7 @@ void __kprobes do_dat_exception(struct pt_regs *regs) if (unlikely(fault)) do_fault_error(regs, fault); } +NOKPROBE_SYMBOL(do_dat_exception); #ifdef CONFIG_PFAULT /* -- cgit v1.1 From 6972cae523de728ad5e8dae01da4a631d98b874c Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Wed, 15 Oct 2014 15:29:01 +0200 Subject: s390/mm: missing pte for gmap_ipte_notify should trigger a VM_BUG After fixup_user_fault does not fail we have a writeable pte. That pte might transform but it should not vanish. Signed-off-by: Dominik Dingel Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b1871d3..9c8a6dd 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -752,8 +752,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) break; /* Walk the process page table, lock and get pte pointer */ ptep = get_locked_pte(gmap->mm, addr, &ptl); - if (unlikely(!ptep)) - continue; + VM_BUG_ON(!ptep); /* Set notification bit in the pgste of the pte */ entry = *ptep; if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { -- cgit v1.1 From a697e051160390065393e2926d9d080077239e9e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 30 Oct 2014 10:55:37 +0100 Subject: s390/mm: use correct unlock function in gmap_ipte_notify The page table lock is acquired with a call to get_locked_pte, replace the plain spin_unlock with the correct unlock function pte_unmap_unlock. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9c8a6dd..71c7eff 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -762,7 +762,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) gaddr += PAGE_SIZE; len -= PAGE_SIZE; } - spin_unlock(ptl); + pte_unmap_unlock(ptep, ptl); } up_read(&gmap->mm->mmap_sem); return rc; -- cgit v1.1 From 413d404768256eda4e13fdfce753fe2bbff2fcaf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 19 Nov 2014 13:31:08 +0100 Subject: s390/traps: print interrupt code and instruction length code It always confuses me to see the mixed instruction length code and interruption code on user space faults, while the message clearly says it is the interruption code. So split the value and print both values separately. Also add the ILC output to the die() message, so thar user and kernel space faults contain the same information. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ca70fad..811937b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -261,8 +261,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) return; if (!printk_ratelimit()) return; - printk(KERN_ALERT "User process fault: interruption code 0x%X ", - regs->int_code); + printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d", + regs->int_code & 0xffff, regs->int_code >> 17); print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); printk(KERN_CONT "\n"); printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", -- cgit v1.1