diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/cpuid.c | 32 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 15 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 70 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 31 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 30 |
7 files changed, 127 insertions, 68 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 72d8c49..5720e78 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -379,7 +379,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB) | F(IBRS); + F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | + F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -409,7 +410,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(ARCH_CAPABILITIES); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -496,6 +497,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, CPUID_7_EDX); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. + */ + entry->edx |= F(ARCH_CAPABILITIES); } else { entry->ebx = 0; entry->ecx = 0; @@ -648,13 +654,25 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBRS and IBPB aren't necessarily present in hardware cpuid */ - if (boot_cpu_has(X86_FEATURE_IBPB)) - entry->ebx |= F(IBPB); - if (boot_cpu_has(X86_FEATURE_IBRS)) - entry->ebx |= F(IBRS); + /* + * IBRS, IBPB and VIRT_SSBD aren't necessarily present in + * hardware cpuid + */ + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + /* + * The preference is to use SPEC CTRL MSR instead of the + * VIRT_SPEC MSR. + */ + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !boot_cpu_has(X86_FEATURE_AMD_SSBD)) + entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 14e0d0a..af8caf96 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1375,14 +1375,18 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) } } -static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { - struct kvm_run *run = vcpu->run; - - kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); + kvm_hv_hypercall_set_result(vcpu, result); + ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } +static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +{ + return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); +} + static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) { struct eventfd_ctx *eventfd; @@ -1527,8 +1531,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) break; } - kvm_hv_hypercall_set_result(vcpu, ret); - return 1; + return kvm_hv_hypercall_complete(vcpu, ret); } void kvm_hv_init_vm(struct kvm *kvm) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 776391c..b5cd846 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1522,11 +1522,23 @@ static bool set_target_expiration(struct kvm_lapic *apic) static void advance_periodic_target_expiration(struct kvm_lapic *apic) { - apic->lapic_timer.tscdeadline += - nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); + ktime_t now = ktime_get(); + u64 tscl = rdtsc(); + ktime_t delta; + + /* + * Synchronize both deadlines to the same time source or + * differences in the periods (caused by differences in the + * underlying clocks or numerical approximation errors) will + * cause the two to drift apart over time as the errors + * accumulate. + */ apic->lapic_timer.target_expiration = ktime_add_ns(apic->lapic_timer.target_expiration, apic->lapic_timer.period); + delta = ktime_sub(apic->lapic_timer.target_expiration, now); + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + + nsec_to_cycles(apic->vcpu, delta); } static void start_sw_period(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f440d43..d594690 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3006,6 +3006,7 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * { siginfo_t info; + clear_siginfo(&info); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_MCEERR_AR; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d9305f1..695b0bd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,7 +49,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/virtext.h> #include "trace.h" @@ -213,6 +213,12 @@ struct vcpu_svm { } host; u64 spec_ctrl; + /* + * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be + * translated into the appropriate L2_CFG bits on the host to + * perform speculative control. + */ + u64 virt_spec_ctrl; u32 *msrpm; @@ -2063,6 +2069,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; + svm->virt_spec_ctrl = 0; if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | @@ -4111,11 +4118,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; msr_info->data = svm->spec_ctrl; break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + msr_info->data = svm->virt_spec_ctrl; + break; case MSR_F15H_IC_CFG: { int family, model; @@ -4206,11 +4221,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; svm->spec_ctrl = data; @@ -4233,7 +4249,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) return 1; if (data & ~PRED_CMD_IBPB) @@ -4247,6 +4263,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + if (data & ~SPEC_CTRL_SSBD) + return 1; + + svm->virt_spec_ctrl = data; + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -5560,8 +5586,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5655,6 +5680,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, svm->host.gs_base); +#else + loadsegment(fs, svm->host.fs); +#ifndef CONFIG_X86_32_LAZY_GS + loadsegment(gs, svm->host.gs); +#endif +#endif + /* * We do not use IBRS in the kernel. If this vCPU has used the * SPEC_CTRL MSR it may have left it on; save the value and @@ -5673,20 +5710,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, svm->host.gs_base); -#else - loadsegment(fs, svm->host.fs); -#ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); -#endif -#endif + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); reload_tss(vcpu); @@ -5789,7 +5813,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_high_real_mode_segbase(void) +static bool svm_has_emulated_msr(int index) { return true; } @@ -7018,7 +7042,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, - .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, + .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 48989f7..fc61e25 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,7 +51,7 @@ #include <asm/apic.h> #include <asm/irq_remapping.h> #include <asm/mmu_context.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/mshyperv.h> #include "trace.h" @@ -3741,7 +3741,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_get_msr_common(vcpu, msr_info); case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -3860,12 +3859,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; vmx->spec_ctrl = data; @@ -3891,7 +3889,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -9757,9 +9754,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); -static bool vmx_has_high_real_mode_segbase(void) +static bool vmx_has_emulated_msr(int index) { - return enable_unrestricted_guest || emulate_invalid_guest_state; + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } } static bool vmx_mpx_supported(void) @@ -9991,8 +10000,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); vmx->__launched = vmx->loaded_vmcs->launched; @@ -10140,8 +10148,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -12910,7 +12917,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, - .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .has_emulated_msr = vmx_has_emulated_msr, .vm_init = vmx_vm_init, .vm_alloc = vmx_vm_alloc, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 06dd4cd..cc8c8be 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1066,6 +1066,7 @@ static u32 emulated_msrs[] = { MSR_SMI_COUNT, MSR_PLATFORM_INFO, MSR_MISC_FEATURES_ENABLES, + MSR_AMD64_VIRT_SPEC_CTRL, }; static unsigned num_emulated_msrs; @@ -2912,7 +2913,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); + r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); @@ -4612,14 +4613,8 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - switch (emulated_msrs[i]) { - case MSR_IA32_SMBASE: - if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) - continue; - break; - default: - break; - } + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + continue; if (j < i) emulated_msrs[j] = emulated_msrs[i]; @@ -6705,11 +6700,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int op_64_bit; - if (kvm_hv_hypercall_enabled(vcpu->kvm)) { - if (!kvm_hv_hypercall(vcpu)) - return 0; - goto out; - } + if (kvm_hv_hypercall_enabled(vcpu->kvm)) + return kvm_hv_hypercall(vcpu); nr = kvm_register_read(vcpu, VCPU_REGS_RAX); a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); @@ -6730,7 +6722,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) if (kvm_x86_ops->get_cpl(vcpu) != 0) { ret = -KVM_EPERM; - goto out_error; + goto out; } switch (nr) { @@ -6750,12 +6742,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ret = -KVM_ENOSYS; break; } -out_error: +out: if (!op_64_bit) ret = (u32)ret; kvm_register_write(vcpu, VCPU_REGS_RAX, ret); -out: ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } @@ -8014,6 +8005,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; + int cpuid_update_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; int ret = -EINVAL; @@ -8052,8 +8044,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; + cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & + (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops->set_cr4(vcpu, sregs->cr4); - if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + if (cpuid_update_needed) kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); |