diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/assigned-dev.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 20 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 43 |
5 files changed, 60 insertions, 28 deletions
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index 6eb5c20..d090ecf 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c @@ -666,7 +666,7 @@ static int probe_sysfs_permissions(struct pci_dev *dev) if (r) return r; - inode = path.dentry->d_inode; + inode = d_backing_inode(path.dentry); r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); path_put(&path); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d67206a..629af0f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -683,8 +683,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, unsigned long bitmap = 1; struct kvm_lapic **dst; int i; - bool ret = false; - bool x2apic_ipi = src && apic_x2apic_mode(src); + bool ret, x2apic_ipi; *r = -1; @@ -696,16 +695,18 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, if (irq->shorthand) return false; + x2apic_ipi = src && apic_x2apic_mode(src); if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST)) return false; + ret = true; rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); - if (!map) + if (!map) { + ret = false; goto out; - - ret = true; + } if (irq->dest_mode == APIC_DEST_PHYSICAL) { if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 146f295..d43867c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4481,9 +4481,11 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, pfn = spte_to_pfn(*sptep); /* - * Only EPT supported for now; otherwise, one would need to - * find out efficiently whether the guest page tables are - * also using huge pages. + * We cannot do huge page mapping for indirect shadow pages, + * which are found on the last rmap (level = 1) when not using + * tdp; such shadow pages are synced with the page table in + * the guest, and the guest page table is using 4K page size + * mapping if the indirect sp has level = 1. */ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && @@ -4504,19 +4506,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, bool flush = false; unsigned long *rmapp; unsigned long last_index, index; - gfn_t gfn_start, gfn_end; spin_lock(&kvm->mmu_lock); - gfn_start = memslot->base_gfn; - gfn_end = memslot->base_gfn + memslot->npages - 1; - - if (gfn_start >= gfn_end) - goto out; - rmapp = memslot->arch.rmap[0]; - last_index = gfn_to_index(gfn_end, memslot->base_gfn, - PT_PAGE_TABLE_LEVEL); + last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1, + memslot->base_gfn, PT_PAGE_TABLE_LEVEL); for (index = 0; index <= last_index; ++index, ++rmapp) { if (*rmapp) @@ -4534,7 +4529,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, if (flush) kvm_flush_remote_tlbs(kvm); -out: spin_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f5e8dce..f7b6168 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3622,8 +3622,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ? - KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + /* + * Pass through host's Machine Check Enable value to hw_cr4, which + * is in force while we are in guest mode. Do not let guests control + * this bit, even if host CR4.MCE == 0. + */ + unsigned long hw_cr4 = + (cr4_read_shadow() & X86_CR4_MCE) | + (cr4 & ~X86_CR4_MCE) | + (to_vmx(vcpu)->rmode.vm86_active ? + KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); if (cr4 & X86_CR4_VMXE) { /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1a8126..c73efcd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1669,12 +1669,28 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) &guest_hv_clock, sizeof(guest_hv_clock)))) return 0; - /* - * The interface expects us to write an even number signaling that the - * update is finished. Since the guest won't see the intermediate - * state, we just increase by 2 at the end. + /* This VCPU is paused, but it's legal for a guest to read another + * VCPU's kvmclock, so we really have to follow the specification where + * it says that version is odd if data is being modified, and even after + * it is consistent. + * + * Version field updates must be kept separate. This is because + * kvm_write_guest_cached might use a "rep movs" instruction, and + * writes within a string instruction are weakly ordered. So there + * are three writes overall. + * + * As a small optimization, only write the version field in the first + * and third write. The vcpu->pv_time cache is still valid, because the + * version field is the first in the struct. */ - vcpu->hv_clock.version = guest_hv_clock.version + 2; + BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + + vcpu->hv_clock.version = guest_hv_clock.version + 1; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); + + smp_wmb(); /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); @@ -1695,6 +1711,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, sizeof(vcpu->hv_clock)); + + smp_wmb(); + + vcpu->hv_clock.version++; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); return 0; } @@ -5799,7 +5822,6 @@ int kvm_arch_init(void *opaque) kvm_set_mmio_spte_mask(); kvm_x86_ops = ops; - kvm_init_msr_list(); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0); @@ -7253,7 +7275,14 @@ void kvm_arch_hardware_disable(void) int kvm_arch_hardware_setup(void) { - return kvm_x86_ops->hardware_setup(); + int r; + + r = kvm_x86_ops->hardware_setup(); + if (r != 0) + return r; + + kvm_init_msr_list(); + return 0; } void kvm_arch_hardware_unsetup(void) |