From d222af072380c4470295c07d84ecb15f4937e365 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 6 Sep 2017 15:20:55 +1000 Subject: KVM: PPC: Book3S HV: Don't access XIVE PIPR register using byte accesses The XIVE interrupt controller on POWER9 machines doesn't support byte accesses to any register in the thread management area other than the CPPR (current processor priority register). In particular, when reading the PIPR (pending interrupt priority register), we need to do a 32-bit or 64-bit load. Cc: stable@vger.kernel.org # v4.13 Fixes: 2c4fb78f78b6 ("KVM: PPC: Book3S HV: Workaround POWER9 DD1.0 bug causing IPB bit loss") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rm_xive.c | 1 - arch/powerpc/kvm/book3s_xive.c | 1 - arch/powerpc/kvm/book3s_xive_template.c | 7 ++++--- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c index abf5f01..5b81a80 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c @@ -38,7 +38,6 @@ static inline void __iomem *get_tima_phys(void) #define __x_tima get_tima_phys() #define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page)) #define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page)) -#define __x_readb __raw_rm_readb #define __x_writeb __raw_rm_writeb #define __x_readw __raw_rm_readw #define __x_readq __raw_rm_readq diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 08b200a..1330462 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -48,7 +48,6 @@ #define __x_tima xive_tima #define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio)) #define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio)) -#define __x_readb __raw_readb #define __x_writeb __raw_writeb #define __x_readw __raw_readw #define __x_readq __raw_readq diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index d1ed2c4..c7a5dea 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -28,7 +28,8 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) * bit. */ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR); + __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS); + u8 pipr = be64_to_cpu(qw1) & 0xff; if (pipr >= xc->hw_cppr) return; } @@ -336,7 +337,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; u8 pending = xc->pending; u32 hirq; - u8 pipr; pr_devel("H_IPOLL(server=%ld)\n", server); @@ -353,7 +353,8 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long pending = 0xff; } else { /* Grab pending interrupt if any */ - pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR); + __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS); + u8 pipr = be64_to_cpu(qw1) & 0xff; if (pipr < 8) pending |= 1 << pipr; } -- cgit v1.1 From cf5f6f3125241853462334b1bc696f3c3c492178 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 11 Sep 2017 16:05:30 +1000 Subject: KVM: PPC: Book3S HV: Hold kvm->lock around call to kvmppc_update_lpcr Commit 468808bd35c4 ("KVM: PPC: Book3S HV: Set process table for HPT guests on POWER9", 2017-01-30) added a call to kvmppc_update_lpcr() which doesn't hold the kvm->lock mutex around the call, as required. This adds the lock/unlock pair, and for good measure, includes the kvmppc_setup_partition_table() call in the locked region, since it is altering global state of the VM. This error appears not to have any fatal consequences for the host; the consequences would be that the VCPUs could end up running with different LPCR values, or an update to the LPCR value by userspace using the one_reg interface could get overwritten, or the update done by kvmhv_configure_mmu() could get overwritten. Cc: stable@vger.kernel.org # v4.10+ Fixes: 468808bd35c4 ("KVM: PPC: Book3S HV: Set process table for HPT guests on POWER9") Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 18e974a..a7177c2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4212,11 +4212,13 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) if ((cfg->process_table & PRTS_MASK) > 24) return -EINVAL; + mutex_lock(&kvm->lock); kvm->arch.process_table = cfg->process_table; kvmppc_setup_partition_table(kvm); lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0; kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE); + mutex_unlock(&kvm->lock); return 0; } -- cgit v1.1 From 67f8a8c1151c9ef3d1285905d1e66ebb769ecdf7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 12 Sep 2017 13:47:23 +1000 Subject: KVM: PPC: Book3S HV: Fix bug causing host SLB to be restored incorrectly Aneesh Kumar reported seeing host crashes when running recent kernels on POWER8. The symptom was an oops like this: Unable to handle kernel paging request for data at address 0xf00000000786c620 Faulting instruction address: 0xc00000000030e1e4 Oops: Kernel access of bad area, sig: 11 [#1] LE SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: powernv_op_panel CPU: 24 PID: 6663 Comm: qemu-system-ppc Tainted: G W 4.13.0-rc7-43932-gfc36c59 #2 task: c000000fdeadfe80 task.stack: c000000fdeb68000 NIP: c00000000030e1e4 LR: c00000000030de6c CTR: c000000000103620 REGS: c000000fdeb6b450 TRAP: 0300 Tainted: G W (4.13.0-rc7-43932-gfc36c59) MSR: 9000000000009033 CR: 24044428 XER: 20000000 CFAR: c00000000030e134 DAR: f00000000786c620 DSISR: 40000000 SOFTE: 0 GPR00: 0000000000000000 c000000fdeb6b6d0 c0000000010bd000 000000000000e1b0 GPR04: c00000000115e168 c000001fffa6e4b0 c00000000115d000 c000001e1b180386 GPR08: f000000000000000 c000000f9a8913e0 f00000000786c600 00007fff587d0000 GPR12: c000000fdeb68000 c00000000fb0f000 0000000000000001 00007fff587cffff GPR16: 0000000000000000 c000000000000000 00000000003fffff c000000fdebfe1f8 GPR20: 0000000000000004 c000000fdeb6b8a8 0000000000000001 0008000000000040 GPR24: 07000000000000c0 00007fff587cffff c000000fdec20bf8 00007fff587d0000 GPR28: c000000fdeca9ac0 00007fff587d0000 00007fff587c0000 00007fff587d0000 NIP [c00000000030e1e4] __get_user_pages_fast+0x434/0x1070 LR [c00000000030de6c] __get_user_pages_fast+0xbc/0x1070 Call Trace: [c000000fdeb6b6d0] [c00000000139dab8] lock_classes+0x0/0x35fe50 (unreliable) [c000000fdeb6b7e0] [c00000000030ef38] get_user_pages_fast+0xf8/0x120 [c000000fdeb6b830] [c000000000112318] kvmppc_book3s_hv_page_fault+0x308/0xf30 [c000000fdeb6b960] [c00000000010e10c] kvmppc_vcpu_run_hv+0xfdc/0x1f00 [c000000fdeb6bb20] [c0000000000e915c] kvmppc_vcpu_run+0x2c/0x40 [c000000fdeb6bb40] [c0000000000e5650] kvm_arch_vcpu_ioctl_run+0x110/0x300 [c000000fdeb6bbe0] [c0000000000d6468] kvm_vcpu_ioctl+0x528/0x900 [c000000fdeb6bd40] [c0000000003bc04c] do_vfs_ioctl+0xcc/0x950 [c000000fdeb6bde0] [c0000000003bc930] SyS_ioctl+0x60/0x100 [c000000fdeb6be30] [c00000000000b96c] system_call+0x58/0x6c Instruction dump: 7ca81a14 2fa50000 41de0010 7cc8182a 68c60002 78c6ffe2 0b060000 3cc2000a 794a3664 390610d8 e9080000 7d485214 7d435378 790507e1 408202f0 ---[ end trace fad4a342d0414aa2 ]--- It turns out that what has happened is that the SLB entry for the vmmemap region hasn't been reloaded on exit from a guest, and it has the wrong page size. Then, when the host next accesses the vmemmap region, it gets a page fault. Commit a25bd72badfa ("powerpc/mm/radix: Workaround prefetch issue with KVM", 2017-07-24) modified the guest exit code so that it now only clears out the SLB for hash guest. The code tests the radix flag and puts the result in a non-volatile CR field, CR2, and later branches based on CR2. Unfortunately, the kvmppc_save_tm function, which gets called between those two points, modifies all the user-visible registers in the case where the guest was in transactional or suspended state, except for a few which it restores (namely r1, r2, r9 and r13). Thus the hash/radix indication in CR2 gets corrupted. This fixes the problem by re-doing the comparison just before the result is needed. For good measure, this also adds comments next to the call sites of kvmppc_save_tm and kvmppc_restore_tm pointing out that non-volatile register state will be lost. Cc: stable@vger.kernel.org # v4.13 Fixes: a25bd72badfa ("powerpc/mm/radix: Workaround prefetch issue with KVM") Tested-by: Aneesh Kumar K.V Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 663a4a8..17936f8 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -771,6 +771,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION + /* + * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR + */ bl kvmppc_restore_tm END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif @@ -1630,6 +1633,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION + /* + * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR + */ bl kvmppc_save_tm END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif @@ -1749,7 +1755,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) /* * Are we running hash or radix ? */ - beq cr2,3f + ld r5, VCPU_KVM(r9) + lbz r0, KVM_RADIX(r5) + cmpwi cr2, r0, 0 + beq cr2, 3f /* Radix: Handle the case where the guest used an illegal PID */ LOAD_REG_ADDR(r4, mmu_base_pid) @@ -2466,6 +2475,9 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION + /* + * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR + */ ld r9, HSTATE_KVM_VCPU(r13) bl kvmppc_save_tm END_FTR_SECTION_IFSET(CPU_FTR_TM) @@ -2578,6 +2590,9 @@ kvm_end_cede: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION + /* + * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR + */ bl kvmppc_restore_tm END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -- cgit v1.1 From 267ad7bc2d3f69af536035b6a3e4a9a2b6ae11dc Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 13 Sep 2017 13:08:23 -0700 Subject: kvm,powerpc: Serialize wq active checks in ops->vcpu_kick Particularly because kvmppc_fast_vcpu_kick_hv() is a callback, ensure that we properly serialize wq active checks in order to avoid potentially missing a wakeup due to racing with the waiter side. Signed-off-by: Davidlohr Bueso Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a7177c2..73bf1eb 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -181,7 +181,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) struct swait_queue_head *wqp; wqp = kvm_arch_vcpu_wq(vcpu); - if (swait_active(wqp)) { + if (swq_has_sleeper(wqp)) { swake_up(wqp); ++vcpu->stat.halt_wakeup; } -- cgit v1.1