From 4fcf361dbdbdb43038bb173e2391c4073e713745 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 20 Nov 2017 14:17:53 +1100 Subject: KVM: PPC: Book3S HV: Remove useless statement This removes a statement that has no effect. It should have been removed in commit 898b25b202f3 ("KVM: PPC: Book3S HV: Simplify dynamic micro-threading code", 2017-06-22) along with the loop over the piggy-backed virtual cores. This issue was reported by Coverity. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/kvm/book3s_hv.c') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2d46037..597498d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2831,7 +2831,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if (!thr0_done) kvmppc_start_thread(NULL, pvc); - thr += pvc->num_threads; } /* -- cgit v1.1 From c0093f1a38a0fd6c32a2269f0533bb13fb95143d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 20 Nov 2017 16:12:25 +1100 Subject: KVM: PPC: Book3S HV: Fix conditions for starting vcpu This corrects the test that determines whether a vcpu that has just become able to run in the guest (e.g. it has just finished handling a hypercall or hypervisor page fault) and whose virtual core is already running somewhere as a "piggybacked" vcore can start immediately or not. (A piggybacked vcore is one which is executing along with another vcore as a result of dynamic micro-threading.) Previously the test tried to lock the piggybacked vcore using spin_trylock, which would always fail because the vcore was already locked, and so the vcpu would have to wait until its vcore exited the guest before it could enter. In fact the vcpu can enter if its vcore is in VCORE_PIGGYBACK state and not already exiting (or exited) the guest, so the test in VCORE_PIGGYBACK state is basically the same as for VCORE_RUNNING state. Coverity detected this as a double unlock issue, which it isn't because the spin_trylock would always fail. This will fix the apparent double unlock as well. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kvm/book3s_hv.c') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 597498d..c4f0beb 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3175,17 +3175,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * this thread straight away and have it join in. */ if (!signal_pending(current)) { - if (vc->vcore_state == VCORE_PIGGYBACK) { - if (spin_trylock(&vc->lock)) { - if (vc->vcore_state == VCORE_RUNNING && - !VCORE_IS_EXITING(vc)) { - kvmppc_create_dtl_entry(vcpu, vc); - kvmppc_start_thread(vcpu, vc); - trace_kvm_guest_enter(vcpu); - } - spin_unlock(&vc->lock); - } - } else if (vc->vcore_state == VCORE_RUNNING && + if ((vc->vcore_state == VCORE_PIGGYBACK || + vc->vcore_state == VCORE_RUNNING) && !VCORE_IS_EXITING(vc)) { kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu, vc); -- cgit v1.1 From 5855564c8ab2d9cefca7b2933bd19818eb795e40 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 12 Jan 2018 20:55:20 +1100 Subject: KVM: PPC: Book3S HV: Enable migration of decrementer register This adds a register identifier for use with the one_reg interface to allow the decrementer expiry time to be read and written by userspace. The decrementer expiry time is in guest timebase units and is equal to the sum of the decrementer and the guest timebase. (The expiry time is used rather than the decrementer value itself because the expiry time is not constantly changing, though the decrementer value is, while the guest vcpu is not running.) Without this, a guest vcpu migrated to a new host will see its decrementer set to some random value. On POWER8 and earlier, the decrementer is 32 bits wide and counts down at 512MHz, so the guest vcpu will potentially see no decrementer interrupts for up to about 4 seconds, which will lead to a stall. With POWER9, the decrementer is now 56 bits side, so the stall can be much longer (up to 2.23 years) and more noticeable. To help work around the problem in cases where userspace has not been updated to migrate the decrementer expiry time, we now set the default decrementer expiry at vcpu creation time to the current time rather than the maximum possible value. This should mean an immediate decrementer interrupt when a migrated vcpu starts running. In cases where the decrementer is 32 bits wide and more than 4 seconds elapse between the creation of the vcpu and when it first runs, the decrementer would have wrapped around to positive values and there may still be a stall - but this is no worse than the current situation. In the large-decrementer case, we are sure to get an immediate decrementer interrupt (assuming the time from vcpu creation to first run is less than 2.23 years) and we thus avoid a very long stall. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/kvm/book3s_hv.c') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c4f0beb..b2d448c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1497,6 +1497,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_ARCH_COMPAT: *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); break; + case KVM_REG_PPC_DEC_EXPIRY: + *val = get_reg_val(id, vcpu->arch.dec_expires + + vcpu->arch.vcore->tb_offset); + break; default: r = -EINVAL; break; @@ -1724,6 +1728,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_ARCH_COMPAT: r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); break; + case KVM_REG_PPC_DEC_EXPIRY: + vcpu->arch.dec_expires = set_reg_val(id, *val) - + vcpu->arch.vcore->tb_offset; + break; default: r = -EINVAL; break; -- cgit v1.1 From 00608e1f007e4cf6031485c5630e0e504bceef9b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 11 Jan 2018 16:54:26 +1100 Subject: KVM: PPC: Book3S HV: Allow HPT and radix on the same core for POWER9 v2.2 POWER9 chip versions starting with "Nimbus" v2.2 can support running with some threads of a core in HPT mode and others in radix mode. This means that we don't have to prohibit independent-threads mode when running a HPT guest on a radix host, and we don't have to do any of the synchronization between threads that was introduced in commit c01015091a77 ("KVM: PPC: Book3S HV: Run HPT guests on POWER9 radix hosts", 2017-10-19). Rather than using up another CPU feature bit, we just do an explicit test on the PVR (processor version register) at module startup time to determine whether we have to take steps to avoid having some threads in HPT mode and some in radix mode (so-called "mixed mode"). We test for "Nimbus" (indicated by 0 or 1 in the top nibble of the lower 16 bits) v2.2 or later, or "Cumulus" (indicated by 2 or 3 in that nibble) v1.1 or later. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kvm/book3s_hv.c') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b2d448c..76cf480 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -118,6 +118,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif +/* If set, the threads on each CPU core have to be in the same MMU mode */ +static bool no_mixing_hpt_and_radix; + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -2386,8 +2389,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) static bool subcore_config_ok(int n_subcores, int n_threads) { /* - * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core - * mode, with one thread per subcore. + * POWER9 "SMT4" cores are permanently in what is effectively a 4-way + * split-core mode, with one thread per subcore. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) return n_subcores <= 4 && n_threads == 1; @@ -2423,8 +2426,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) if (!cpu_has_feature(CPU_FTR_ARCH_207S)) return false; - /* POWER9 currently requires all threads to be in the same MMU mode */ - if (cpu_has_feature(CPU_FTR_ARCH_300) && + /* Some POWER9 chips require all threads to be in the same MMU mode */ + if (no_mixing_hpt_and_radix && kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm)) return false; @@ -2687,9 +2690,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * threads are offline. Also check if the number of threads in this * guest are greater than the current system threads per guest. * On POWER9, we need to be not in independent-threads mode if - * this is a HPT guest on a radix host. + * this is a HPT guest on a radix host machine where the + * CPU threads may not be in different MMU modes. */ - hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm); + hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() && + !kvm_is_radix(vc->kvm); if (((controlled_threads > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) || (hpt_on_radix && vc->kvm->arch.threads_indep)) { @@ -4446,6 +4451,19 @@ static int kvmppc_book3s_init_hv(void) if (kvmppc_radix_possible()) r = kvmppc_radix_init(); + + /* + * POWER9 chips before version 2.02 can't have some threads in + * HPT mode and some in radix mode on the same core. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + unsigned int pvr = mfspr(SPRN_PVR); + if ((pvr >> 16) == PVR_POWER9 && + (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) || + ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101))) + no_mixing_hpt_and_radix = true; + } + return r; } -- cgit v1.1 From 2267ea7661798a42f0da648a2970e2a03f4bc370 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:37:13 +1100 Subject: KVM: PPC: Book3S HV: Don't use existing "prodded" flag for XIVE escalations The prodded flag is only cleared at the beginning of H_CEDE, so every time we have an escalation, we will cause the *next* H_CEDE to return immediately. Instead use a dedicated "irq_pending" flag to indicate that a guest interrupt is pending for the VCPU. We don't reuse the existing exception bitmap so as to avoid expensive atomic ops. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kvm/book3s_hv.c') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 76cf480..e5f81fc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2999,7 +2999,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) { if (!xive_enabled()) return false; - return vcpu->arch.xive_saved_state.pipr < + return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < vcpu->arch.xive_saved_state.cppr; } #else -- cgit v1.1