From 1a92a80ad386a1a6e3b36d576d52a1a456394b70 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:28:00 +1000 Subject: powerpc/mm: Ensure cpumask update is ordered There is no guarantee that the various isync's involved with the context switch will order the update of the CPU mask with the first TLB entry for the new context being loaded by the HW. Be safe here and add a memory barrier to order any subsequent load/store which may bring entries into the TLB. The corresponding barrier on the other side already exists as pte updates use pte_xchg() which uses __cmpxchg_u64 which has a sync after the atomic operation. Cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Nicholas Piggin [mpe: Add comments in the code] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 18 ++++++++++++++++++ arch/powerpc/include/asm/pgtable-be-types.h | 1 + arch/powerpc/include/asm/pgtable-types.h | 1 + 3 files changed, 20 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 0c76675..35bec1c 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -90,6 +90,24 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, /* Mark this context has been used on the new CPU */ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + + /* + * This full barrier orders the store to the cpumask above vs + * a subsequent operation which allows this CPU to begin loading + * translations for next. + * + * When using the radix MMU that operation is the load of the + * MMU context id, which is then moved to SPRN_PID. + * + * For the hash MMU it is either the first load from slb_cache + * in switch_slb(), and/or the store of paca->mm_ctx_id in + * copy_mm_to_paca(). + * + * On the read side the barrier is in pte_xchg(), which orders + * the store to the PTE vs the load of mm_cpumask. + */ + smp_mb(); + new_on_cpu = true; } diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index 9c0f5db..67e7e3d9 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) unsigned long *p = (unsigned long *)ptep; __be64 prev; + /* See comment in switch_mm_irqs_off() */ prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old), (__force unsigned long)pte_raw(new)); diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 8bd3b13..369a164 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) { unsigned long *p = (unsigned long *)ptep; + /* See comment in switch_mm_irqs_off() */ return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new)); } #endif -- cgit v1.1 From bd0fdb191c8523a9126bb14ac1b22cb47698ebf5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 13 Mar 2017 03:03:49 +1000 Subject: KVM: PPC: Book3S HV: Use msgsync with hypervisor doorbells on POWER9 When msgsnd is used for IPIs to other cores, msgsync must be executed by the target to order stores performed on the source before its msgsnd (provided the source executes the appropriate sync). Fixes: 1704a81ccebc ("KVM: PPC: Book3S HV: Use msgsnd for IPIs to other cores on POWER9") Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c52184a..9c9c983 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1291,6 +1291,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Hypervisor doorbell - exit only if host IPI flag set */ cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL bne 3f +BEGIN_FTR_SECTION + PPC_MSGSYNC +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 beq 4f -- cgit v1.1 From 2c4fb78f78b6e420604ee1b05bdfb5c1d637869f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Aug 2017 12:10:52 +1000 Subject: KVM: PPC: Book3S HV: Workaround POWER9 DD1.0 bug causing IPB bit loss This adds a workaround for a bug in POWER9 DD1 chips where changing the CPPR (Current Processor Priority Register) can cause bits in the IPB (Interrupt Pending Buffer) to get lost. Thankfully it only happens when manually manipulating CPPR which is quite rare. When it does happen it can cause interrupts to be delayed or lost. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive_template.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 4636ca6..150be86 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -16,7 +16,16 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) u8 cppr; u16 ack; - /* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */ + /* + * DD1 bug workaround: If PIPR is less favored than CPPR + * ignore the interrupt or we might incorrectly lose an IPB + * bit. + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR); + if (pipr >= xc->hw_cppr) + return; + } /* Perform the acknowledge OS to register cycle. */ ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG)); -- cgit v1.1 From bb9b52bd51dcb17b965a30167d0812902c1b9927 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Aug 2017 12:10:58 +1000 Subject: KVM: PPC: Book3S HV: Add missing barriers to XIVE code and document them This adds missing memory barriers to order updates/tests of the virtual CPPR and MFRR, thus fixing a lost IPI problem. While at it also document all barriers in this file. This fixes a bug causing guest IPIs to occasionally get lost. The symptom then is hangs or stalls in the guest. Signed-off-by: Benjamin Herrenschmidt Tested-by: Guilherme G. Piccoli Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive_template.c | 57 +++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 150be86..d1ed2c4 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -17,6 +17,12 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) u16 ack; /* + * Ensure any previous store to CPPR is ordered vs. + * the subsequent loads from PIPR or ACK. + */ + eieio(); + + /* * DD1 bug workaround: If PIPR is less favored than CPPR * ignore the interrupt or we might incorrectly lose an IPB * bit. @@ -244,6 +250,11 @@ skip_ipi: /* * If we found an interrupt, adjust what the guest CPPR should * be as if we had just fetched that interrupt from HW. + * + * Note: This can only make xc->cppr smaller as the previous + * loop will only exit with hirq != 0 if prio is lower than + * the current xc->cppr. Thus we don't need to re-check xc->mfrr + * for pending IPIs. */ if (hirq) xc->cppr = prio; @@ -390,6 +401,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) xc->cppr = cppr; /* + * Order the above update of xc->cppr with the subsequent + * read of xc->mfrr inside push_pending_to_hw() + */ + smp_mb(); + + /* * We are masking less, we need to look for pending things * to deliver and set VP pending bits accordingly to trigger * a new interrupt otherwise we might miss MFRR changes for @@ -429,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr) * used to signal MFRR changes is EOId when fetched from * the queue. */ - if (irq == XICS_IPI || irq == 0) + if (irq == XICS_IPI || irq == 0) { + /* + * This barrier orders the setting of xc->cppr vs. + * subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); goto bail; + } /* Find interrupt source */ sb = kvmppc_xive_find_source(xive, irq, &src); if (!sb) { pr_devel(" source not found !\n"); rc = H_PARAMETER; + /* Same as above */ + smp_mb(); goto bail; } state = &sb->irq_state[src]; kvmppc_xive_select_irq(state, &hw_num, &xd); state->in_eoi = true; - mb(); + + /* + * This barrier orders both setting of in_eoi above vs, + * subsequent test of guest_priority, and the setting + * of xc->cppr vs. subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); again: if (state->guest_priority == MASKED) { @@ -470,6 +503,14 @@ again: } + /* + * This barrier orders the above guest_priority check + * and spin_lock/unlock with clearing in_eoi below. + * + * It also has to be a full mb() as it must ensure + * the MMIOs done in source_eoi() are completed before + * state->in_eoi is visible. + */ mb(); state->in_eoi = false; bail: @@ -504,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, /* Locklessly write over MFRR */ xc->mfrr = mfrr; + /* + * The load of xc->cppr below and the subsequent MMIO store + * to the IPI must happen after the above mfrr update is + * globally visible so that: + * + * - Synchronize with another CPU doing an H_EOI or a H_CPPR + * updating xc->cppr then reading xc->mfrr. + * + * - The target of the IPI sees the xc->mfrr update + */ + mb(); + /* Shoot the IPI if most favored than target cppr */ if (mfrr < xc->cppr) __x_writeq(0, __x_trig_page(&xc->vp_ipi_data)); -- cgit v1.1 From 47c5310a8dbe7c2cb9f0083daa43ceed76c257fa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 24 Aug 2017 19:14:47 +1000 Subject: KVM: PPC: Book3S: Fix race and leak in kvm_vm_ioctl_create_spapr_tce() Nixiaoming pointed out that there is a memory leak in kvm_vm_ioctl_create_spapr_tce() if the call to anon_inode_getfd() fails; the memory allocated for the kvmppc_spapr_tce_table struct is not freed, and nor are the pages allocated for the iommu tables. In addition, we have already incremented the process's count of locked memory pages, and this doesn't get restored on error. David Hildenbrand pointed out that there is a race in that the function checks early on that there is not already an entry in the stt->iommu_tables list with the same LIOBN, but an entry with the same LIOBN could get added between then and when the new entry is added to the list. This fixes all three problems. To simplify things, we now call anon_inode_getfd() before placing the new entry in the list. The check for an existing entry is done while holding the kvm->lock mutex, immediately before adding the new entry to the list. Finally, on failure we now call kvmppc_account_memlimit to decrement the process's count of locked memory pages. Reported-by: Nixiaoming Reported-by: David Hildenbrand Signed-off-by: Paul Mackerras Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_64_vio.c | 56 ++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 22 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index a160c14..53766e2 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args) { struct kvmppc_spapr_tce_table *stt = NULL; + struct kvmppc_spapr_tce_table *siter; unsigned long npages, size; int ret = -ENOMEM; int i; + int fd = -1; if (!args->size) return -EINVAL; - /* Check this LIOBN hasn't been previously allocated */ - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == args->liobn) - return -EBUSY; - } - size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); npages = kvmppc_tce_pages(size); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); - if (ret) { - stt = NULL; - goto fail; - } + if (ret) + return ret; ret = -ENOMEM; stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), GFP_KERNEL); if (!stt) - goto fail; + goto fail_acct; stt->liobn = args->liobn; stt->page_shift = args->page_shift; @@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; } - kvm_get_kvm(kvm); + ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR | O_CLOEXEC); + if (ret < 0) + goto fail; mutex_lock(&kvm->lock); - list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + + /* Check this LIOBN hasn't been previously allocated */ + ret = 0; + list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { + if (siter->liobn == args->liobn) { + ret = -EBUSY; + break; + } + } + + if (!ret) { + list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + kvm_get_kvm(kvm); + } mutex_unlock(&kvm->lock); - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR | O_CLOEXEC); + if (!ret) + return fd; -fail: - if (stt) { - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); + put_unused_fd(fd); - kfree(stt); - } + fail: + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + fail_acct: + kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); return ret; } -- cgit v1.1 From d1d5762e4767324ce4ec32f9b2d2aaccfd87f664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Thu, 31 Aug 2017 17:17:28 -0400 Subject: powerpc/powernv: update to new mmu_notifier semantic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calls to mmu_notifier_invalidate_page() were replaced by calls to mmu_notifier_invalidate_range() and now are bracketed by calls to mmu_notifier_invalidate_range_start()/end() Remove now useless invalidate_page callback. Signed-off-by: Jérôme Glisse Cc: linuxppc-dev@lists.ozlabs.org Cc: Alistair Popple Cc: Michael Ellerman Cc: Kirill A. Shutemov Cc: Andrew Morton Cc: Andrea Arcangeli Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/powernv/npu-dma.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index b5d960d..4c7b859 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -614,15 +614,6 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, mmio_invalidate(npu_context, 1, address, true); } -static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address) -{ - struct npu_context *npu_context = mn_to_npu_context(mn); - - mmio_invalidate(npu_context, 1, address, true); -} - static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) @@ -640,7 +631,6 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { .release = pnv_npu2_mn_release, .change_pte = pnv_npu2_mn_change_pte, - .invalidate_page = pnv_npu2_mn_invalidate_page, .invalidate_range = pnv_npu2_mn_invalidate_range, }; -- cgit v1.1 From fb1522e099f0c69f36655af233a64e3f55941f5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Thu, 31 Aug 2017 17:17:37 -0400 Subject: KVM: update to new mmu_notifier semantic v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calls to mmu_notifier_invalidate_page() were replaced by calls to mmu_notifier_invalidate_range() and are now bracketed by calls to mmu_notifier_invalidate_range_start()/end() Remove now useless invalidate_page callback. Changed since v1 (Linus Torvalds) - remove now useless kvm_arch_mmu_notifier_invalidate_page() Signed-off-by: Jérôme Glisse Tested-by: Mike Galbraith Tested-by: Adam Borowski Cc: Paolo Bonzini Cc: Radim Krčmář Cc: kvm@vger.kernel.org Cc: Kirill A. Shutemov Cc: Andrew Morton Cc: Andrea Arcangeli Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8b3f123..e372ed8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -67,11 +67,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ -} - #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 -- cgit v1.1