From d4cb11340be6a1613d40d2b546cb111ea2547066 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Wed, 29 Jan 2014 16:02:32 +0100 Subject: KVM: s390: Clear storage keys page_table_reset_pgste() already does a complete page table walk to reset the pgste. Enhance it to initialize the storage keys to PAGE_DEFAULT_KEY if requested by the caller. This will be used for lazy storage key handling. Also provide an empty stub for !CONFIG_PGSTE Lets adopt the current code (diag 308) to not clear the keys. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index d7cfd57..be80f55 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -883,8 +883,8 @@ static inline void page_table_free_pgste(unsigned long *table) __free_page(page); } -static inline unsigned long page_table_reset_pte(struct mm_struct *mm, - pmd_t *pmd, unsigned long addr, unsigned long end) +static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, bool init_skey) { pte_t *start_pte, *pte; spinlock_t *ptl; @@ -895,6 +895,22 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm, do { pgste = pgste_get_lock(pte); pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + if (init_skey) { + unsigned long address; + + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | + PGSTE_GR_BIT | PGSTE_GC_BIT); + + /* skip invalid and not writable pages */ + if (pte_val(*pte) & _PAGE_INVALID || + !(pte_val(*pte) & _PAGE_WRITE)) { + pgste_set_unlock(pte, pgste); + continue; + } + + address = pte_val(*pte) & PAGE_MASK; + page_set_storage_key(address, PAGE_DEFAULT_KEY, 1); + } pgste_set_unlock(pte, pgste); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(start_pte, ptl); @@ -902,8 +918,8 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm, return addr; } -static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, - pud_t *pud, unsigned long addr, unsigned long end) +static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, bool init_skey) { unsigned long next; pmd_t *pmd; @@ -913,14 +929,14 @@ static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; - next = page_table_reset_pte(mm, pmd, addr, next); + next = page_table_reset_pte(mm, pmd, addr, next, init_skey); } while (pmd++, addr = next, addr != end); return addr; } -static inline unsigned long page_table_reset_pud(struct mm_struct *mm, - pgd_t *pgd, unsigned long addr, unsigned long end) +static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, bool init_skey) { unsigned long next; pud_t *pud; @@ -930,14 +946,14 @@ static inline unsigned long page_table_reset_pud(struct mm_struct *mm, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - next = page_table_reset_pmd(mm, pud, addr, next); + next = page_table_reset_pmd(mm, pud, addr, next, init_skey); } while (pud++, addr = next, addr != end); return addr; } -void page_table_reset_pgste(struct mm_struct *mm, - unsigned long start, unsigned long end) +void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, + unsigned long end, bool init_skey) { unsigned long addr, next; pgd_t *pgd; @@ -949,7 +965,7 @@ void page_table_reset_pgste(struct mm_struct *mm, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - next = page_table_reset_pud(mm, pgd, addr, next); + next = page_table_reset_pud(mm, pgd, addr, next, init_skey); } while (pgd++, addr = next, addr != end); up_read(&mm->mmap_sem); } @@ -1011,6 +1027,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, return NULL; } +void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, + unsigned long end, bool init_skey) +{ +} + static inline void page_table_free_pgste(unsigned long *table) { } -- cgit v1.1 From 934bc131efc3e4be6a52f7dd6c4dbf99635e381a Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Tue, 14 Jan 2014 18:10:17 +0100 Subject: KVM: s390: Allow skeys to be enabled for the current process Introduce a new function s390_enable_skey(), which enables storage key handling via setting the use_skey flag in the mmu context. This function is only useful within the context of kvm. Note that enabling storage keys will cause a one-time hickup when walking the page table; however, it saves us special effort for cases like clear reset while making it possible for us to be architecture conform. s390_enable_skey() takes the page table lock to prevent reseting storage keys triggered from multiple vcpus. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index be80f55..02a8607 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1378,6 +1378,29 @@ int s390_enable_sie(void) } EXPORT_SYMBOL_GPL(s390_enable_sie); +/* + * Enable storage key handling from now on and initialize the storage + * keys with the default key. + */ +void s390_enable_skey(void) +{ + /* + * To avoid races between multiple vcpus, ending in calling + * page_table_reset twice or more, + * the page_table_lock is taken for serialization. + */ + spin_lock(¤t->mm->page_table_lock); + if (mm_use_skey(current->mm)) { + spin_unlock(¤t->mm->page_table_lock); + return; + } + + current->mm->context.use_skey = 1; + spin_unlock(¤t->mm->page_table_lock); + page_table_reset_pgste(current->mm, 0, TASK_SIZE, true); +} +EXPORT_SYMBOL_GPL(s390_enable_skey); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) -- cgit v1.1 From 0a61b222df75a6a69dc34816f7db2f61fee8c935 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 18 Oct 2013 12:03:41 +0200 Subject: KVM: s390/mm: use software dirty bit detection for user dirty tracking Switch the user dirty bit detection used for migration from the hardware provided host change-bit in the pgste to a fault based detection method. This reduced the dependency of the host from the storage key to a point where it becomes possible to enable the RCP bypass for KVM guests. The fault based dirty detection will only indicate changes caused by accesses via the guest address space. The hardware based method can detect all changes, even those caused by I/O or accesses via the kernel page table. The KVM/qemu code needs to take this into account. Signed-off-by: Martin Schwidefsky Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 02a8607..1ddf975 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -832,6 +832,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte) } spin_unlock(&gmap_notifier_lock); } +EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); static inline int page_table_with_pgste(struct page *page) { @@ -864,8 +865,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, atomic_set(&page->_mapcount, 0); table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, - PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); return table; } @@ -1005,7 +1005,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, /* changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) - pgste_val(new) |= PGSTE_HC_BIT; + pgste_val(new) |= PGSTE_UC_BIT; pgste_set_unlock(ptep, new); pte_unmap_unlock(*ptep, ptl); -- cgit v1.1 From a0bf4f149bbfa2e31b5f4172c817afdb7b986733 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 24 Mar 2014 14:27:58 +0100 Subject: KVM: s390/mm: new gmap_test_and_clear_dirty function For live migration kvm needs to test and clear the dirty bit of guest pages. That for is ptep_test_and_clear_user_dirty, to be sure we are not racing with other code, we protect the pte. This needs to be done within the architecture memory management code. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 1ddf975..ea4a31b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1401,6 +1401,27 @@ void s390_enable_skey(void) } EXPORT_SYMBOL_GPL(s390_enable_skey); +/* + * Test and reset if a guest page is dirty + */ +bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) +{ + pte_t *pte; + spinlock_t *ptl; + bool dirty = false; + + pte = get_locked_pte(gmap->mm, address, &ptl); + if (unlikely(!pte)) + return false; + + if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte)) + dirty = true; + + spin_unlock(ptl); + return dirty; +} +EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) -- cgit v1.1 From 3a801517ad49f586f2016e1b1321e6cd28a97a04 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 16 May 2014 10:34:11 +0200 Subject: KVM: s390: correct locking for s390_enable_skey Use the mm semaphore to serialize multiple invocations of s390_enable_skey. The second CPU faulting on a storage key operation needs to wait for the completion of the page table update. Taking the mm semaphore writable has the positive side-effect that it prevents any host faults from taking place which does have implications on keys vs PGSTE. Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ea4a31b..66ba60c 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -958,8 +958,10 @@ void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, unsigned long addr, next; pgd_t *pgd; + down_write(&mm->mmap_sem); + if (init_skey && mm_use_skey(mm)) + goto out_up; addr = start; - down_read(&mm->mmap_sem); pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); @@ -967,7 +969,10 @@ void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, continue; next = page_table_reset_pud(mm, pgd, addr, next, init_skey); } while (pgd++, addr = next, addr != end); - up_read(&mm->mmap_sem); + if (init_skey) + current->mm->context.use_skey = 1; +out_up: + up_write(&mm->mmap_sem); } EXPORT_SYMBOL(page_table_reset_pgste); @@ -1384,19 +1389,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); */ void s390_enable_skey(void) { - /* - * To avoid races between multiple vcpus, ending in calling - * page_table_reset twice or more, - * the page_table_lock is taken for serialization. - */ - spin_lock(¤t->mm->page_table_lock); - if (mm_use_skey(current->mm)) { - spin_unlock(¤t->mm->page_table_lock); - return; - } - - current->mm->context.use_skey = 1; - spin_unlock(¤t->mm->page_table_lock); page_table_reset_pgste(current->mm, 0, TASK_SIZE, true); } EXPORT_SYMBOL_GPL(s390_enable_skey); -- cgit v1.1