diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 401 |
1 files changed, 304 insertions, 97 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a..b58787d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -131,6 +131,28 @@ static const u32 host_save_user_msrs[] = { #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) +struct kvm_sev_info { + bool active; /* SEV enabled guest */ + unsigned int asid; /* ASID used for this guest */ + unsigned int handle; /* SEV firmware handle */ + int fd; /* SEV device fd */ + unsigned long pages_locked; /* Number of pages locked */ + struct list_head regions_list; /* List of registered regions */ +}; + +struct kvm_svm { + struct kvm kvm; + + /* Struct members for AVIC */ + u32 avic_vm_id; + u32 ldr_mode; + struct page *avic_logical_id_table_page; + struct page *avic_physical_id_table_page; + struct hlist_node hnode; + + struct kvm_sev_info sev_info; +}; + struct kvm_vcpu; struct nested_state { @@ -178,6 +200,8 @@ struct vcpu_svm { uint64_t sysenter_eip; uint64_t tsc_aux; + u64 msr_decfg; + u64 next_rip; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; @@ -274,6 +298,54 @@ static bool npt_enabled = true; static bool npt_enabled; #endif +/* + * These 2 parameters are used to config the controls for Pause-Loop Exiting: + * pause_filter_count: On processors that support Pause filtering(indicated + * by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter + * count value. On VMRUN this value is loaded into an internal counter. + * Each time a pause instruction is executed, this counter is decremented + * until it reaches zero at which time a #VMEXIT is generated if pause + * intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause + * Intercept Filtering for more details. + * This also indicate if ple logic enabled. + * + * pause_filter_thresh: In addition, some processor families support advanced + * pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on + * the amount of time a guest is allowed to execute in a pause loop. + * In this mode, a 16-bit pause filter threshold field is added in the + * VMCB. The threshold value is a cycle count that is used to reset the + * pause counter. As with simple pause filtering, VMRUN loads the pause + * count value from VMCB into an internal counter. Then, on each pause + * instruction the hardware checks the elapsed number of cycles since + * the most recent pause instruction against the pause filter threshold. + * If the elapsed cycle count is greater than the pause filter threshold, + * then the internal pause count is reloaded from the VMCB and execution + * continues. If the elapsed cycle count is less than the pause filter + * threshold, then the internal pause count is decremented. If the count + * value is less than zero and PAUSE intercept is enabled, a #VMEXIT is + * triggered. If advanced pause filtering is supported and pause filter + * threshold field is set to zero, the filter will operate in the simpler, + * count only mode. + */ + +static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP; +module_param(pause_filter_thresh, ushort, 0444); + +static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW; +module_param(pause_filter_count, ushort, 0444); + +/* Default doubles per-vcpu window every exit. */ +static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW; +module_param(pause_filter_count_grow, ushort, 0444); + +/* Default resets per-vcpu window every exit to pause_filter_count. */ +static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; +module_param(pause_filter_count_shrink, ushort, 0444); + +/* Default is to compute the maximum so we can never overflow. */ +static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; +module_param(pause_filter_count_max, ushort, 0444); + /* allow nested paging (virtualized MMU) for all guests */ static int npt = true; module_param(npt, int, S_IRUGO); @@ -300,6 +372,8 @@ module_param(vgif, int, 0444); static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +static u8 rsm_ins_bytes[] = "\x0f\xaa"; + static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -348,6 +422,12 @@ struct enc_region { unsigned long size; }; + +static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) +{ + return container_of(kvm, struct kvm_svm, kvm); +} + static inline bool svm_sev_enabled(void) { return max_sev_asid; @@ -355,14 +435,14 @@ static inline bool svm_sev_enabled(void) static inline bool sev_guest(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->active; } static inline int sev_get_asid(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->asid; } @@ -1079,7 +1159,7 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm) } /* Note: - * This hash table is used to map VM_ID to a struct kvm_arch, + * This hash table is used to map VM_ID to a struct kvm_svm, * when handling AMD IOMMU GALOG notification to schedule in * a particular vCPU. */ @@ -1096,7 +1176,7 @@ static DEFINE_SPINLOCK(svm_vm_data_hash_lock); static int avic_ga_log_notifier(u32 ga_tag) { unsigned long flags; - struct kvm_arch *ka = NULL; + struct kvm_svm *kvm_svm; struct kvm_vcpu *vcpu = NULL; u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag); u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag); @@ -1104,13 +1184,10 @@ static int avic_ga_log_notifier(u32 ga_tag) pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); - hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { - struct kvm *kvm = container_of(ka, struct kvm, arch); - struct kvm_arch *vm_data = &kvm->arch; - - if (vm_data->avic_vm_id != vm_id) + hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) { + if (kvm_svm->avic_vm_id != vm_id) continue; - vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); + vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id); break; } spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); @@ -1168,6 +1245,42 @@ err: return rc; } +static void grow_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; + int old = control->pause_filter_count; + + control->pause_filter_count = __grow_ple_window(old, + pause_filter_count, + pause_filter_count_grow, + pause_filter_count_max); + + if (control->pause_filter_count != old) + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + + trace_kvm_ple_window_grow(vcpu->vcpu_id, + control->pause_filter_count, old); +} + +static void shrink_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; + int old = control->pause_filter_count; + + control->pause_filter_count = + __shrink_ple_window(old, + pause_filter_count, + pause_filter_count_shrink, + pause_filter_count); + if (control->pause_filter_count != old) + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + + trace_kvm_ple_window_shrink(vcpu->vcpu_id, + control->pause_filter_count, old); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1198,6 +1311,14 @@ static __init int svm_hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 32; } + /* Check for pause filtering support */ + if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { + pause_filter_count = 0; + pause_filter_thresh = 0; + } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) { + pause_filter_thresh = 0; + } + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); @@ -1324,10 +1445,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; - struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm); phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); - phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page)); - phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page)); + phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page)); + phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page)); vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; @@ -1359,6 +1480,14 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, MC_VECTOR); set_exception_intercept(svm, AC_VECTOR); set_exception_intercept(svm, DB_VECTOR); + /* + * Guest access to VMware backdoor ports could legitimately + * trigger #GP because of TSS I/O permission bitmap. + * We intercept those #GP and allow access to them anyway + * as VMware does. + */ + if (enable_vmware_backdoor) + set_exception_intercept(svm, GP_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1367,7 +1496,6 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_RDPMC); set_intercept(svm, INTERCEPT_CPUID); set_intercept(svm, INTERCEPT_INVD); - set_intercept(svm, INTERCEPT_HLT); set_intercept(svm, INTERCEPT_INVLPG); set_intercept(svm, INTERCEPT_INVLPGA); set_intercept(svm, INTERCEPT_IOIO_PROT); @@ -1383,12 +1511,16 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_SKINIT); set_intercept(svm, INTERCEPT_WBINVD); set_intercept(svm, INTERCEPT_XSETBV); + set_intercept(svm, INTERCEPT_RSM); - if (!kvm_mwait_in_guest()) { + if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { set_intercept(svm, INTERCEPT_MONITOR); set_intercept(svm, INTERCEPT_MWAIT); } + if (!kvm_hlt_in_guest(svm->vcpu.kvm)) + set_intercept(svm, INTERCEPT_HLT); + control->iopm_base_pa = __sme_set(iopm_base); control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); control->int_ctl = V_INTR_MASKING_MASK; @@ -1444,9 +1576,13 @@ static void init_vmcb(struct vcpu_svm *svm) svm->nested.vmcb = 0; svm->vcpu.arch.hflags = 0; - if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { - control->pause_filter_count = 3000; + if (pause_filter_count) { + control->pause_filter_count = pause_filter_count; + if (pause_filter_thresh) + control->pause_filter_thresh = pause_filter_thresh; set_intercept(svm, INTERCEPT_PAUSE); + } else { + clr_intercept(svm, INTERCEPT_PAUSE); } if (kvm_vcpu_apicv_active(&svm->vcpu)) @@ -1483,12 +1619,12 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, unsigned int index) { u64 *avic_physical_id_table; - struct kvm_arch *vm_data = &vcpu->kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); if (index >= AVIC_MAX_PHYSICAL_ID_COUNT) return NULL; - avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page); + avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page); return &avic_physical_id_table[index]; } @@ -1571,7 +1707,7 @@ static void __sev_asid_free(int asid) static void sev_asid_free(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; __sev_asid_free(sev->asid); } @@ -1611,7 +1747,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, unsigned long ulen, unsigned long *n, int write) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; unsigned long npages, npinned, size; unsigned long locked, lock_limit; struct page **pages; @@ -1662,7 +1798,7 @@ err: static void sev_unpin_memory(struct kvm *kvm, struct page **pages, unsigned long npages) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; release_pages(pages, npages); kvfree(pages); @@ -1700,9 +1836,20 @@ static void __unregister_enc_region_locked(struct kvm *kvm, kfree(region); } +static struct kvm *svm_vm_alloc(void) +{ + struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL); + return &kvm_svm->kvm; +} + +static void svm_vm_free(struct kvm *kvm) +{ + kfree(to_kvm_svm(kvm)); +} + static void sev_vm_destroy(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct list_head *head = &sev->regions_list; struct list_head *pos, *q; @@ -1731,18 +1878,18 @@ static void sev_vm_destroy(struct kvm *kvm) static void avic_vm_destroy(struct kvm *kvm) { unsigned long flags; - struct kvm_arch *vm_data = &kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(kvm); if (!avic) return; - if (vm_data->avic_logical_id_table_page) - __free_page(vm_data->avic_logical_id_table_page); - if (vm_data->avic_physical_id_table_page) - __free_page(vm_data->avic_physical_id_table_page); + if (kvm_svm->avic_logical_id_table_page) + __free_page(kvm_svm->avic_logical_id_table_page); + if (kvm_svm->avic_physical_id_table_page) + __free_page(kvm_svm->avic_physical_id_table_page); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); - hash_del(&vm_data->hnode); + hash_del(&kvm_svm->hnode); spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); } @@ -1756,10 +1903,10 @@ static int avic_vm_init(struct kvm *kvm) { unsigned long flags; int err = -ENOMEM; - struct kvm_arch *vm_data = &kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(kvm); + struct kvm_svm *k2; struct page *p_page; struct page *l_page; - struct kvm_arch *ka; u32 vm_id; if (!avic) @@ -1770,7 +1917,7 @@ static int avic_vm_init(struct kvm *kvm) if (!p_page) goto free_avic; - vm_data->avic_physical_id_table_page = p_page; + kvm_svm->avic_physical_id_table_page = p_page; clear_page(page_address(p_page)); /* Allocating logical APIC ID table (4KB) */ @@ -1778,7 +1925,7 @@ static int avic_vm_init(struct kvm *kvm) if (!l_page) goto free_avic; - vm_data->avic_logical_id_table_page = l_page; + kvm_svm->avic_logical_id_table_page = l_page; clear_page(page_address(l_page)); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); @@ -1790,15 +1937,13 @@ static int avic_vm_init(struct kvm *kvm) } /* Is it still in use? Only possible if wrapped at least once */ if (next_vm_id_wrapped) { - hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { - struct kvm *k2 = container_of(ka, struct kvm, arch); - struct kvm_arch *vd2 = &k2->arch; - if (vd2->avic_vm_id == vm_id) + hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) { + if (k2->avic_vm_id == vm_id) goto again; } } - vm_data->avic_vm_id = vm_id; - hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); + kvm_svm->avic_vm_id = vm_id; + hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id); spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); return 0; @@ -1902,6 +2047,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; if (!init_event) { @@ -2529,14 +2675,7 @@ static int bp_interception(struct vcpu_svm *svm) static int ud_interception(struct vcpu_svm *svm) { - int er; - - er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); - if (er == EMULATE_USER_EXIT) - return 0; - if (er != EMULATE_DONE) - kvm_queue_exception(&svm->vcpu, UD_VECTOR); - return 1; + return handle_ud(&svm->vcpu); } static int ac_interception(struct vcpu_svm *svm) @@ -2545,6 +2684,23 @@ static int ac_interception(struct vcpu_svm *svm) return 1; } +static int gp_interception(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + u32 error_code = svm->vmcb->control.exit_info_1; + int er; + + WARN_ON_ONCE(!enable_vmware_backdoor); + + er = emulate_instruction(vcpu, + EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); + if (er == EMULATE_USER_EXIT) + return 0; + else if (er != EMULATE_DONE) + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; +} + static bool is_erratum_383(void) { int err, i; @@ -2633,7 +2789,7 @@ static int io_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ - int size, in, string, ret; + int size, in, string; unsigned port; ++svm->vcpu.stat.io_exits; @@ -2645,16 +2801,8 @@ static int io_interception(struct vcpu_svm *svm) port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; svm->next_rip = svm->vmcb->control.exit_info_2; - ret = kvm_skip_emulated_instruction(&svm->vcpu); - /* - * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - if (in) - return kvm_fast_pio_in(vcpu, size, port) && ret; - else - return kvm_fast_pio_out(vcpu, size, port) && ret; + return kvm_fast_pio(&svm->vcpu, size, port, in); } static int nmi_interception(struct vcpu_svm *svm) @@ -3699,6 +3847,12 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +static int rsm_interception(struct vcpu_svm *svm) +{ + return x86_emulate_instruction(&svm->vcpu, 0, 0, + rsm_ins_bytes, 2) == EMULATE_DONE; +} + static int rdpmc_interception(struct vcpu_svm *svm) { int err; @@ -3860,6 +4014,22 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int svm_get_msr_feature(struct kvm_msr_entry *msr) +{ + msr->data = 0; + + switch (msr->index) { + case MSR_F10H_DECFG: + if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + break; + default: + return 1; + } + + return 0; +} + static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3935,9 +4105,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->spec_ctrl; break; - case MSR_IA32_UCODE_REV: - msr_info->data = 0x01000065; - break; case MSR_F15H_IC_CFG: { int family, model; @@ -3955,6 +4122,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; + case MSR_F10H_DECFG: + msr_info->data = svm->msr_decfg; + break; default: return kvm_get_msr_common(vcpu, msr_info); } @@ -4133,6 +4303,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_F10H_DECFG: { + struct kvm_msr_entry msr_entry; + + msr_entry.index = msr->index; + if (svm_get_msr_feature(&msr_entry)) + return 1; + + /* Check the supported bits */ + if (data & ~msr_entry.data) + return 1; + + /* Don't allow the guest to change a bit, #GP */ + if (!msr->host_initiated && (data ^ msr_entry.data)) + return 1; + + svm->msr_decfg = data; + break; + } case MSR_IA32_APICBASE: if (kvm_vcpu_apicv_active(vcpu)) avic_update_vapic_bar(to_svm(vcpu), data); @@ -4187,6 +4375,9 @@ static int pause_interception(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; bool in_kernel = (svm_get_cpl(vcpu) == 0); + if (pause_filter_thresh) + grow_ple_window(vcpu); + kvm_vcpu_on_spin(vcpu, in_kernel); return 1; } @@ -4277,7 +4468,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) { - struct kvm_arch *vm_data = &vcpu->kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); int index; u32 *logical_apic_id_table; int dlid = GET_APIC_LOGICAL_ID(ldr); @@ -4299,7 +4490,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) index = (cluster << 2) + apic; } - logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page); + logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page); return &logical_apic_id_table[index]; } @@ -4379,7 +4570,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_arch *vm_data = &vcpu->kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); u32 mod = (dfr >> 28) & 0xf; @@ -4388,11 +4579,11 @@ static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) * If this changes, we need to flush the AVIC logical * APID id table. */ - if (vm_data->ldr_mode == mod) + if (kvm_svm->ldr_mode == mod) return 0; - clear_page(page_address(vm_data->avic_logical_id_table_page)); - vm_data->ldr_mode = mod; + clear_page(page_address(kvm_svm->avic_logical_id_table_page)); + kvm_svm->ldr_mode = mod; if (svm->ldr_reg) avic_handle_ldr_update(vcpu); @@ -4512,6 +4703,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, + [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, @@ -4541,7 +4733,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_NPF] = npf_interception, - [SVM_EXIT_RSM] = emulate_on_interception, + [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, }; @@ -4560,6 +4752,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); pr_err("%-20s%016llx\n", "intercepts:", control->intercept); pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); + pr_err("%-20s%d\n", "pause filter threshold:", + control->pause_filter_thresh); pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); @@ -5027,7 +5221,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); - pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id, + pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, svm->vcpu.vcpu_id); pi.is_guest_mode = true; pi.vcpu_data = &vcpu_info; @@ -5191,6 +5385,11 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } +static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) +{ + return 0; +} + static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5355,7 +5554,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5464,11 +5663,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -5492,14 +5691,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) - kvm_before_handle_nmi(&svm->vcpu); + kvm_before_interrupt(&svm->vcpu); stgi(); /* Any pending NMI will happen here */ if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) - kvm_after_handle_nmi(&svm->vcpu); + kvm_after_interrupt(&svm->vcpu); sync_cr8_to_lapic(vcpu); @@ -5875,6 +6074,8 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu) static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) { + if (pause_filter_thresh) + shrink_ple_window(vcpu); } static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) @@ -5991,7 +6192,7 @@ static int sev_asid_new(void) static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; int asid, ret; ret = -EBUSY; @@ -6056,14 +6257,14 @@ static int __sev_issue_cmd(int fd, int id, void *data, int *error) static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return __sev_issue_cmd(sev->fd, id, data, error); } static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_start *start; struct kvm_sev_launch_start params; void *dh_blob, *session_blob; @@ -6161,7 +6362,7 @@ static int get_num_contig_pages(int idx, struct page **inpages, static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) { unsigned long vaddr, vaddr_end, next_vaddr, npages, size; - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_launch_update_data params; struct sev_data_launch_update_data *data; struct page **inpages; @@ -6236,16 +6437,18 @@ e_free: static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + void __user *measure = (void __user *)(uintptr_t)argp->data; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_measure *data; struct kvm_sev_launch_measure params; + void __user *p = NULL; void *blob = NULL; int ret; if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, measure, sizeof(params))) return -EFAULT; data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -6256,17 +6459,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!params.len) goto cmd; - if (params.uaddr) { + p = (void __user *)(uintptr_t)params.uaddr; + if (p) { if (params.len > SEV_FW_BLOB_MAX_SIZE) { ret = -EINVAL; goto e_free; } - if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) { - ret = -EFAULT; - goto e_free; - } - ret = -ENOMEM; blob = kmalloc(params.len, GFP_KERNEL); if (!blob) @@ -6290,13 +6489,13 @@ cmd: goto e_free_blob; if (blob) { - if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) + if (copy_to_user(p, blob, params.len)) ret = -EFAULT; } done: params.len = data->len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) + if (copy_to_user(measure, ¶ms, sizeof(params))) ret = -EFAULT; e_free_blob: kfree(blob); @@ -6307,7 +6506,7 @@ e_free: static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_finish *data; int ret; @@ -6327,7 +6526,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_guest_status params; struct sev_data_guest_status *data; int ret; @@ -6359,7 +6558,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, unsigned long dst, int size, int *error, bool enc) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_dbg *data; int ret; @@ -6591,13 +6790,13 @@ err: static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_secret *data; struct kvm_sev_launch_secret params; struct page **pages; void *blob, *hdr; unsigned long n; - int ret; + int ret, offset; if (!sev_guest(kvm)) return -ENOTTY; @@ -6623,6 +6822,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!data) goto e_unpin_memory; + offset = params.guest_uaddr & (PAGE_SIZE - 1); + data->guest_address = __sme_page_pa(pages[0]) + offset; + data->guest_len = params.guest_len; + blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); if (IS_ERR(blob)) { ret = PTR_ERR(blob); @@ -6637,8 +6840,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) ret = PTR_ERR(hdr); goto e_free_blob; } - data->trans_address = __psp_pa(blob); - data->trans_len = params.trans_len; + data->hdr_address = __psp_pa(hdr); + data->hdr_len = params.hdr_len; data->handle = sev->handle; ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); @@ -6711,7 +6914,7 @@ out: static int svm_register_enc_region(struct kvm *kvm, struct kvm_enc_region *range) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct enc_region *region; int ret = 0; @@ -6753,7 +6956,7 @@ e_free: static struct enc_region * find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct list_head *head = &sev->regions_list; struct enc_region *i; @@ -6811,6 +7014,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_free = svm_free_vcpu, .vcpu_reset = svm_vcpu_reset, + .vm_alloc = svm_vm_alloc, + .vm_free = svm_vm_free, .vm_init = avic_vm_init, .vm_destroy = svm_vm_destroy, @@ -6821,6 +7026,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_unblocking = svm_vcpu_unblocking, .update_bp_intercept = update_bp_intercept, + .get_msr_feature = svm_get_msr_feature, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, @@ -6876,6 +7082,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .apicv_post_state_restore = avic_post_state_restore, .set_tss_addr = svm_set_tss_addr, + .set_identity_map_addr = svm_set_identity_map_addr, .get_tdp_level = get_npt_level, .get_mt_mask = svm_get_mt_mask, |