summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt66
-rw-r--r--Documentation/virtual/kvm/mmu.txt6
-rw-r--r--arch/arm/kvm/mmu.c10
-rw-r--r--arch/mips/include/asm/kvm_host.h2
-rw-r--r--arch/mips/kvm/mips.c9
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h14
-rw-r--r--arch/powerpc/kvm/book3s.c9
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c20
-rw-r--r--arch/powerpc/kvm/book3s_pr.c11
-rw-r--r--arch/powerpc/kvm/booke.c7
-rw-r--r--arch/powerpc/kvm/powerpc.c7
-rw-r--r--arch/s390/include/asm/kvm_host.h2
-rw-r--r--arch/s390/kvm/kvm-s390.c20
-rw-r--r--arch/x86/include/asm/kvm_emulate.h9
-rw-r--r--arch/x86/include/asm/kvm_host.h48
-rw-r--r--arch/x86/include/asm/pvclock-abi.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h11
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/kvmclock.c14
-rw-r--r--arch/x86/kvm/Kconfig9
-rw-r--r--arch/x86/kvm/cpuid.c11
-rw-r--r--arch/x86/kvm/cpuid.h16
-rw-r--r--arch/x86/kvm/emulate.c262
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h5
-rw-r--r--arch/x86/kvm/lapic.c33
-rw-r--r--arch/x86/kvm/lapic.h7
-rw-r--r--arch/x86/kvm/mmu.c209
-rw-r--r--arch/x86/kvm/mmu_audit.c16
-rw-r--r--arch/x86/kvm/paging_tmpl.h18
-rw-r--r--arch/x86/kvm/svm.c74
-rw-r--r--arch/x86/kvm/trace.h22
-rw-r--r--arch/x86/kvm/vmx.c105
-rw-r--r--arch/x86/kvm/x86.c616
-rw-r--r--include/linux/kvm_host.h64
-rw-r--r--include/linux/kvm_types.h1
-rw-r--r--include/uapi/linux/kvm.h6
-rw-r--r--virt/kvm/kvm_main.c414
40 files changed, 1655 insertions, 509 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6955444..a7926a9 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -254,6 +254,11 @@ since the last call to this ioctl. Bit 0 is the first page in the
memory slot. Ensure the entire structure is cleared to avoid padding
issues.
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
+the address space for which you want to return the dirty bitmap.
+They must be less than the value that KVM_CHECK_EXTENSION returns for
+the KVM_CAP_MULTI_ADDRESS_SPACE capability.
+
4.9 KVM_SET_MEMORY_ALIAS
@@ -820,11 +825,21 @@ struct kvm_vcpu_events {
} nmi;
__u32 sipi_vector;
__u32 flags;
+ struct {
+ __u8 smm;
+ __u8 pending;
+ __u8 smm_inside_nmi;
+ __u8 latched_init;
+ } smi;
};
-KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
-interrupt.shadow contains a valid state. Otherwise, this field is undefined.
+Only two fields are defined in the flags field:
+
+- KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
+ interrupt.shadow contains a valid state.
+- KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
+ smi contains a valid state.
4.32 KVM_SET_VCPU_EVENTS
@@ -841,17 +856,20 @@ vcpu.
See KVM_GET_VCPU_EVENTS for the data structure.
Fields that may be modified asynchronously by running VCPUs can be excluded
-from the update. These fields are nmi.pending and sipi_vector. Keep the
-corresponding bits in the flags field cleared to suppress overwriting the
-current in-kernel state. The bits are:
+from the update. These fields are nmi.pending, sipi_vector, smi.smm,
+smi.pending. Keep the corresponding bits in the flags field cleared to
+suppress overwriting the current in-kernel state. The bits are:
KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
+KVM_VCPUEVENT_VALID_SMM - transfer the smi sub-struct.
If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
the flags field to signal that interrupt.shadow contains a valid state and
shall be written into the VCPU.
+KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
+
4.33 KVM_GET_DEBUGREGS
@@ -911,6 +929,13 @@ slot. When changing an existing slot, it may be moved in the guest
physical memory space, or its flags may be modified. It may not be
resized. Slots may not overlap in guest physical address space.
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
+specifies the address space which is being modified. They must be
+less than the value that KVM_CHECK_EXTENSION returns for the
+KVM_CAP_MULTI_ADDRESS_SPACE capability. Slots in separate address spaces
+are unrelated; the restriction on overlapping slots only applies within
+each address space.
+
Memory for the region is taken starting at the address denoted by the
field userspace_addr, which must point at user addressable memory for
the entire memory slot size. Any object may back this memory, including
@@ -1269,7 +1294,7 @@ The flags bitmap is defined as:
/* the host supports the ePAPR idle hcall
#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
-4.48 KVM_ASSIGN_PCI_DEVICE
+4.48 KVM_ASSIGN_PCI_DEVICE (deprecated)
Capability: none
Architectures: x86
@@ -1319,7 +1344,7 @@ Errors:
have their standard meanings.
-4.49 KVM_DEASSIGN_PCI_DEVICE
+4.49 KVM_DEASSIGN_PCI_DEVICE (deprecated)
Capability: none
Architectures: x86
@@ -1338,7 +1363,7 @@ Errors:
Other error conditions may be defined by individual device types or
have their standard meanings.
-4.50 KVM_ASSIGN_DEV_IRQ
+4.50 KVM_ASSIGN_DEV_IRQ (deprecated)
Capability: KVM_CAP_ASSIGN_DEV_IRQ
Architectures: x86
@@ -1378,7 +1403,7 @@ Errors:
have their standard meanings.
-4.51 KVM_DEASSIGN_DEV_IRQ
+4.51 KVM_DEASSIGN_DEV_IRQ (deprecated)
Capability: KVM_CAP_ASSIGN_DEV_IRQ
Architectures: x86
@@ -1452,7 +1477,7 @@ struct kvm_irq_routing_s390_adapter {
};
-4.53 KVM_ASSIGN_SET_MSIX_NR
+4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated)
Capability: none
Architectures: x86
@@ -1474,7 +1499,7 @@ struct kvm_assigned_msix_nr {
#define KVM_MAX_MSIX_PER_DEV 256
-4.54 KVM_ASSIGN_SET_MSIX_ENTRY
+4.54 KVM_ASSIGN_SET_MSIX_ENTRY (deprecated)
Capability: none
Architectures: x86
@@ -1630,7 +1655,7 @@ should skip processing the bitmap and just invalidate everything. It must
be set to the number of set bits in the bitmap.
-4.61 KVM_ASSIGN_SET_INTX_MASK
+4.61 KVM_ASSIGN_SET_INTX_MASK (deprecated)
Capability: KVM_CAP_PCI_2_3
Architectures: x86
@@ -2979,6 +3004,16 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
which is the maximum number of possibly pending cpu-local interrupts.
+4.90 KVM_SMI
+
+Capability: KVM_CAP_X86_SMM
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+
+Queues an SMI on the thread's vcpu.
+
5. The kvm_run structure
------------------------
@@ -3014,7 +3049,12 @@ an interrupt can be injected now with KVM_INTERRUPT.
The value of the current interrupt flag. Only valid if in-kernel
local APIC is not used.
- __u8 padding2[2];
+ __u16 flags;
+
+More architecture-specific flags detailing state of the VCPU that may
+affect the device's behavior. The only currently defined flag is
+KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
+VCPU is in system management mode.
/* in (pre_kvm_run), out (post_kvm_run) */
__u64 cr8;
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index c59bd9b..3a4d681 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -173,6 +173,12 @@ Shadow pages contain the following information:
Contains the value of cr4.smap && !cr0.wp for which the page is valid
(pages for which this is true are different from other pages; see the
treatment of cr0.wp=0 below).
+ role.smm:
+ Is 1 if the page is valid in system management mode. This field
+ determines which of the kvm_memslots array was used to build this
+ shadow page; it is also used to go back from a struct kvm_mmu_page
+ to a memslot, through the kvm_memslots_for_spte_role macro and
+ __gfn_to_memslot.
gfn:
Either the guest page table containing the translations shadowed by this
page, or the base page frame for linear translations. See role.direct.
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index ce0bce4..7b42012 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1155,7 +1155,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
*/
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
{
- struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
@@ -1718,8 +1719,9 @@ out:
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
/*
@@ -1733,7 +1735,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
hva_t hva = mem->userspace_addr;
@@ -1838,7 +1840,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
return 0;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
+void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
{
}
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 4c25823..e8c8d9d 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -839,7 +839,7 @@ static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index a8e660a..cd4c129 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -198,15 +198,16 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
unsigned long npages = 0;
@@ -968,6 +969,7 @@ out:
/* Get (and clear) the dirty memory log for a memory slot. */
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
unsigned long ga, ga_end;
int is_dirty = 0;
@@ -982,7 +984,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
ga = memslot->base_gfn << PAGE_SHIFT;
ga_end = ga + (memslot->npages << PAGE_SHIFT);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 3536d12..2aa79c8 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -430,7 +430,7 @@ static inline void note_hpte_modification(struct kvm *kvm,
*/
static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
{
- return rcu_dereference_raw_notrace(kvm->memslots);
+ return rcu_dereference_raw_notrace(kvm->memslots[0]);
}
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index a193a13..d91f65b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -698,7 +698,7 @@ struct kvm_vcpu_arch {
static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_exit(void) {}
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b8475da..c6ef05b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -182,10 +182,11 @@ extern int kvmppc_core_create_memslot(struct kvm *kvm,
unsigned long npages);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem);
+ const struct kvm_userspace_memory_region *mem);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old);
+ const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new);
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
struct kvm_ppc_smmu_info *info);
extern void kvmppc_core_flush_memslot(struct kvm *kvm,
@@ -243,10 +244,11 @@ struct kvmppc_ops {
void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
int (*prepare_memory_region)(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem);
+ const struct kvm_userspace_memory_region *mem);
void (*commit_memory_region)(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old);
+ const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new);
int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
unsigned long end);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 453a8a4..05ea8fc 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -757,16 +757,17 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem)
{
return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
+ const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new)
{
- kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
+ kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new);
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 1a4acf8..dab68b7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -650,7 +650,7 @@ static void kvmppc_rmap_reset(struct kvm *kvm)
int srcu_idx;
srcu_idx = srcu_read_lock(&kvm->srcu);
- slots = kvm->memslots;
+ slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
/*
* This assumes it is acceptable to lose reference and
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48d3c5d..68d067a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc)
*/
static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu, *vnext;
int i;
int srcu_idx;
@@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if ((threads_per_core > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+ arch.run_list) {
vcpu->arch.ret = -EBUSY;
kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run);
@@ -2320,6 +2321,7 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
struct kvm_dirty_log *log)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int r;
unsigned long n;
@@ -2330,7 +2332,8 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
@@ -2373,16 +2376,18 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem)
{
return 0;
}
static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
+ const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new)
{
unsigned long npages = mem->memory_size >> PAGE_SHIFT;
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
if (npages && old->npages) {
@@ -2392,7 +2397,8 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
* since the rmap array starts out as all zeroes,
* i.e. no pages are dirty.
*/
- memslot = id_to_memslot(kvm->memslots, mem->slot);
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, mem->slot);
kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
}
}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index f573839..64891b0 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1530,6 +1530,7 @@ out:
static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
struct kvm_dirty_log *log)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
struct kvm_vcpu *vcpu;
ulong ga, ga_end;
@@ -1545,7 +1546,8 @@ static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
ga = memslot->base_gfn << PAGE_SHIFT;
ga_end = ga + (memslot->npages << PAGE_SHIFT);
@@ -1571,14 +1573,15 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem)
{
return 0;
}
static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
+ const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new)
{
return;
}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3872ab3..cc58426 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1784,14 +1784,15 @@ int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem)
{
return 0;
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
+ const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new)
{
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8cd1f80..e5dde32 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -595,18 +595,19 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- kvmppc_core_commit_memory_region(kvm, mem, old);
+ kvmppc_core_commit_memory_region(kvm, mem, old, new);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 444c412..3024acb 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -636,7 +636,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d461f8a..71530a4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -36,6 +36,10 @@
#include "kvm-s390.h"
#include "gaccess.h"
+#define KMSG_COMPONENT "kvm-s390"
+#undef pr_fmt
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "trace-s390.h"
@@ -236,6 +240,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
{
int r;
unsigned long n;
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int is_dirty = 0;
@@ -245,7 +250,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
@@ -1417,6 +1423,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
{
atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+ exit_sie(vcpu);
}
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
@@ -1427,6 +1434,7 @@ void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
{
atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+ exit_sie(vcpu);
}
static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
@@ -1450,7 +1458,6 @@ void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
{
kvm_make_request(req, vcpu);
kvm_s390_vcpu_request(vcpu);
- exit_sie(vcpu);
}
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
@@ -2087,7 +2094,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
- pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
+ pr_err_ratelimited("can't run stopped vcpu %d\n",
vcpu->vcpu_id);
return -EINVAL;
}
@@ -2580,7 +2587,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
/* A few sanity checks. We can have memory slots which have to be
@@ -2598,8 +2605,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
int rc;
@@ -2618,7 +2626,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
if (rc)
- printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
+ pr_warn("failed to commit memory region\n");
return;
}
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 57a9d94..e16466e 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -193,6 +193,8 @@ struct x86_emulate_ops {
int (*cpl)(struct x86_emulate_ctxt *ctxt);
int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
+ u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
+ void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
@@ -262,6 +264,11 @@ enum x86emul_mode {
X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */
};
+/* These match some of the HF_* flags defined in kvm_host.h */
+#define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
+#define X86EMUL_SMM_MASK (1 << 6)
+#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7)
+
struct x86_emulate_ctxt {
const struct x86_emulate_ops *ops;
@@ -273,8 +280,8 @@ struct x86_emulate_ctxt {
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
+ int emul_flags;
- bool guest_mode; /* guest running a nested guest */
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bbb8f4e..8ca32cf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -184,23 +184,12 @@ struct kvm_mmu_memory_cache {
void *objects[KVM_NR_MEM_OBJS];
};
-/*
- * kvm_mmu_page_role, below, is defined as:
- *
- * bits 0:3 - total guest paging levels (2-4, or zero for real mode)
- * bits 4:7 - page table level for this shadow (1-4)
- * bits 8:9 - page table quadrant for 2-level guests
- * bit 16 - direct mapping of virtual to physical mapping at gfn
- * used for real mode and two-dimensional paging
- * bits 17:19 - common access permissions for all ptes in this shadow page
- */
union kvm_mmu_page_role {
unsigned word;
struct {
unsigned level:4;
unsigned cr4_pae:1;
unsigned quadrant:2;
- unsigned pad_for_nice_hex_output:6;
unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
@@ -208,6 +197,15 @@ union kvm_mmu_page_role {
unsigned cr0_wp:1;
unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
+ unsigned :8;
+
+ /*
+ * This is left at the top of the word so that
+ * kvm_memslots_for_spte_role can extract it with a
+ * simple shift. While there is room, give it a whole
+ * byte so it is also faster to load it from memory.
+ */
+ unsigned smm:8;
};
};
@@ -368,6 +366,7 @@ struct kvm_vcpu_arch {
int32_t apic_arb_prio;
int mp_state;
u64 ia32_misc_enable_msr;
+ u64 smbase;
bool tpr_access_reporting;
u64 ia32_xss;
@@ -401,6 +400,7 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_header_cache;
struct fpu guest_fpu;
+ bool eager_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
@@ -470,6 +470,7 @@ struct kvm_vcpu_arch {
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */
bool nmi_injected; /* Trying to inject an NMI this entry */
+ bool smi_pending; /* SMI queued after currently running handler */
struct mtrr_state_type mtrr_state;
u64 pat;
@@ -708,6 +709,7 @@ struct kvm_x86_ops {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
+ bool (*cpu_has_high_real_mode_segbase)(void);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
/* Create, but do not attach this VCPU */
@@ -720,7 +722,7 @@ struct kvm_x86_ops {
void (*vcpu_put)(struct kvm_vcpu *vcpu);
void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
- int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+ int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
void (*get_segment)(struct kvm_vcpu *vcpu,
@@ -747,6 +749,7 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+ void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
@@ -872,7 +875,7 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
struct kvm_memory_slot *memslot);
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
+ const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot);
void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@ -883,7 +886,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
@@ -939,7 +942,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
+int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
struct x86_emulate_ctxt;
@@ -968,7 +971,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
@@ -1113,6 +1116,14 @@ enum {
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
+#define HF_SMM_MASK (1 << 6)
+#define HF_SMM_INSIDE_NMI_MASK (1 << 7)
+
+#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+#define KVM_ADDRESS_SPACE_NUM 2
+
+#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
/*
* Hardware virtualization extension instructions may fault if a
@@ -1183,4 +1194,9 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
+int __x86_set_memory_region(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem);
+int x86_set_memory_region(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 6167fd7..655e07a 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -41,5 +41,6 @@ struct pvclock_wall_clock {
#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
#define PVCLOCK_GUEST_STOPPED (1 << 1)
+#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 2fec75e..a4ae82e 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -106,6 +106,8 @@ struct kvm_ioapic_state {
#define KVM_IRQCHIP_IOAPIC 2
#define KVM_NR_IRQCHIPS 3
+#define KVM_RUN_X86_SMM (1 << 0)
+
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
@@ -281,6 +283,7 @@ struct kvm_reinject_control {
#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001
#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002
#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
+#define KVM_VCPUEVENT_VALID_SMM 0x00000008
/* Interrupt shadow states */
#define KVM_X86_SHADOW_INT_MOV_SS 0x01
@@ -309,7 +312,13 @@ struct kvm_vcpu_events {
} nmi;
__u32 sipi_vector;
__u32 flags;
- __u32 reserved[10];
+ struct {
+ __u8 smm;
+ __u8 pending;
+ __u8 smm_inside_nmi;
+ __u8 latched_init;
+ } smi;
+ __u32 reserved[9];
};
/* for KVM_GET/SET_DEBUGREGS */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9435620..cc34cec 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -331,7 +331,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
apic_write(APIC_EOI, APIC_EOI_ACK);
}
-void kvm_guest_cpu_init(void)
+static void kvm_guest_cpu_init(void)
{
if (!kvm_para_available())
return;
@@ -655,7 +655,7 @@ static inline void spin_time_accum_blocked(u64 start)
static struct dentry *d_spin_debug;
static struct dentry *d_kvm_debug;
-struct dentry *kvm_init_debugfs(void)
+static struct dentry *kvm_init_debugfs(void)
{
d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
if (!d_kvm_debug)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 42caaef..49487b4 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -24,6 +24,7 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/memblock.h>
+#include <linux/sched.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
@@ -217,8 +218,10 @@ static void kvm_shutdown(void)
void __init kvmclock_init(void)
{
+ struct pvclock_vcpu_time_info *vcpu_time;
unsigned long mem;
- int size;
+ int size, cpu;
+ u8 flags;
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
@@ -264,7 +267,14 @@ void __init kvmclock_init(void)
pv_info.name = "KVM";
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
- pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
+ pvclock_set_flags(~0);
+
+ cpu = get_cpu();
+ vcpu_time = &hv_clock[cpu].pvti;
+ flags = pvclock_read_flags(vcpu_time);
+ if (flags & PVCLOCK_COUNTS_FROM_ZERO)
+ set_sched_clock_stable();
+ put_cpu();
}
int __init kvm_setup_vsyscall_timeinfo(void)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 413a7bf..d8a1d56 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -86,15 +86,16 @@ config KVM_MMU_AUDIT
auditing of KVM MMU events at runtime.
config KVM_DEVICE_ASSIGNMENT
- bool "KVM legacy PCI device assignment support"
+ bool "KVM legacy PCI device assignment support (DEPRECATED)"
depends on KVM && PCI && IOMMU_API
- default y
+ default n
---help---
Provide support for legacy PCI device assignment through KVM. The
kernel now also supports a full featured userspace device driver
- framework through VFIO, which supersedes much of this support.
+ framework through VFIO, which supersedes this support and provides
+ better security.
- If unsure, say Y.
+ If unsure, say N.
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 59b69f6..9dadf8d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,6 +16,8 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
+#include <asm/i387.h> /* For use_eager_fpu. Ugh! */
+#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */
#include <asm/user.h>
#include <asm/xsave.h>
#include "cpuid.h"
@@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+ vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
+
/*
* The existing code assumes virtual address is 48-bit in the canonical
* address checks; exit if it is ever changed.
@@ -411,6 +415,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
break;
}
+ case 6: /* Thermal management */
+ entry->eax = 0x4; /* allow ARAT */
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ break;
case 7: {
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* Mask ebx against host capability word 9 */
@@ -587,7 +597,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break;
case 3: /* Processor serial number */
case 5: /* MONITOR/MWAIT */
- case 6: /* Thermal management */
case 0xC0000002:
case 0xC0000003:
case 0xC0000004:
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c3b1ad9..dd05b9c 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -70,6 +70,14 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
}
+static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ return best && (best->edx & bit(X86_FEATURE_LM));
+}
+
static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -117,4 +125,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
+
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 9b655d1..e7a4fde 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2259,6 +2259,260 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
return rc;
}
+static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
+{
+ u32 eax, ebx, ecx, edx;
+
+ eax = 0x80000001;
+ ecx = 0;
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ return edx & bit(X86_FEATURE_LM);
+}
+
+#define GET_SMSTATE(type, smbase, offset) \
+ ({ \
+ type __val; \
+ int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \
+ sizeof(__val), NULL); \
+ if (r != X86EMUL_CONTINUE) \
+ return X86EMUL_UNHANDLEABLE; \
+ __val; \
+ })
+
+static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+{
+ desc->g = (flags >> 23) & 1;
+ desc->d = (flags >> 22) & 1;
+ desc->l = (flags >> 21) & 1;
+ desc->avl = (flags >> 20) & 1;
+ desc->p = (flags >> 15) & 1;
+ desc->dpl = (flags >> 13) & 3;
+ desc->s = (flags >> 12) & 1;
+ desc->type = (flags >> 8) & 15;
+}
+
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+{
+ struct desc_struct desc;
+ int offset;
+ u16 selector;
+
+ selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
+
+ if (n < 3)
+ offset = 0x7f84 + n * 12;
+ else
+ offset = 0x7f2c + (n - 3) * 12;
+
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+ return X86EMUL_CONTINUE;
+}
+
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+{
+ struct desc_struct desc;
+ int offset;
+ u16 selector;
+ u32 base3;
+
+ offset = 0x7e00 + n * 16;
+
+ selector = GET_SMSTATE(u16, smbase, offset);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
+ base3 = GET_SMSTATE(u32, smbase, offset + 12);
+
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+ return X86EMUL_CONTINUE;
+}
+
+static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+ u64 cr0, u64 cr4)
+{
+ int bad;
+
+ /*
+ * First enable PAE, long mode needs it before CR0.PG = 1 is set.
+ * Then enable protected mode. However, PCID cannot be enabled
+ * if EFER.LMA=0, so set it separately.
+ */
+ bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ if (cr4 & X86_CR4_PCIDE) {
+ bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ return X86EMUL_CONTINUE;
+}
+
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+{
+ struct desc_struct desc;
+ struct desc_ptr dt;
+ u16 selector;
+ u32 val, cr0, cr4;
+ int i;
+
+ cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
+ ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+ ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
+
+ for (i = 0; i < 8; i++)
+ *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
+
+ val = GET_SMSTATE(u32, smbase, 0x7fcc);
+ ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+ val = GET_SMSTATE(u32, smbase, 0x7fc8);
+ ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+ selector = GET_SMSTATE(u32, smbase, 0x7fc4);
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+
+ selector = GET_SMSTATE(u32, smbase, 0x7fc0);
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+
+ dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
+ dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
+ ctxt->ops->set_gdt(ctxt, &dt);
+
+ dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
+ dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
+ ctxt->ops->set_idt(ctxt, &dt);
+
+ for (i = 0; i < 6; i++) {
+ int r = rsm_load_seg_32(ctxt, smbase, i);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
+ cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
+
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+
+ return rsm_enter_protected_mode(ctxt, cr0, cr4);
+}
+
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+{
+ struct desc_struct desc;
+ struct desc_ptr dt;
+ u64 val, cr0, cr4;
+ u32 base3;
+ u16 selector;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
+
+ ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
+ ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
+
+ val = GET_SMSTATE(u32, smbase, 0x7f68);
+ ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+ val = GET_SMSTATE(u32, smbase, 0x7f60);
+ ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+ cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
+ ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
+ cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
+ val = GET_SMSTATE(u64, smbase, 0x7ed0);
+ ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
+
+ selector = GET_SMSTATE(u32, smbase, 0x7e90);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
+ base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+
+ dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
+ dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
+ ctxt->ops->set_idt(ctxt, &dt);
+
+ selector = GET_SMSTATE(u32, smbase, 0x7e70);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
+ base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+
+ dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
+ dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
+ ctxt->ops->set_gdt(ctxt, &dt);
+
+ for (i = 0; i < 6; i++) {
+ int r = rsm_load_seg_64(ctxt, smbase, i);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
+ return rsm_enter_protected_mode(ctxt, cr0, cr4);
+}
+
+static int em_rsm(struct x86_emulate_ctxt *ctxt)
+{
+ unsigned long cr0, cr4, efer;
+ u64 smbase;
+ int ret;
+
+ if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
+ return emulate_ud(ctxt);
+
+ /*
+ * Get back to real mode, to prepare a safe state in which to load
+ * CR0/CR3/CR4/EFER. Also this will ensure that addresses passed
+ * to read_std/write_std are not virtual.
+ *
+ * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
+ */
+ cr0 = ctxt->ops->get_cr(ctxt, 0);
+ if (cr0 & X86_CR0_PE)
+ ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PAE)
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+ efer = 0;
+ ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+
+ smbase = ctxt->ops->get_smbase(ctxt);
+ if (emulator_has_longmode(ctxt))
+ ret = rsm_load_state_64(ctxt, smbase + 0x8000);
+ else
+ ret = rsm_load_state_32(ctxt, smbase + 0x8000);
+
+ if (ret != X86EMUL_CONTINUE) {
+ /* FIXME: should triple fault */
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ ctxt->ops->set_nmi_mask(ctxt, false);
+
+ ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
+ ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
+ return X86EMUL_CONTINUE;
+}
+
static void
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
struct desc_struct *cs, struct desc_struct *ss)
@@ -4197,7 +4451,7 @@ static const struct opcode twobyte_table[256] = {
F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
/* 0xA8 - 0xAF */
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
- DI(ImplicitOps, rsm),
+ II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
@@ -4895,7 +5149,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
fetch_possible_mmx_operand(ctxt, &ctxt->dst);
}
- if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
+ if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_PRE_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -4924,7 +5178,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
- if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
+ if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -4978,7 +5232,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
special_insn:
- if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
+ if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_MEMACCESS);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 544076c..e1e89ee 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -99,4 +99,9 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
return vcpu->arch.hflags & HF_GUEST_MASK;
}
+static inline bool is_smm(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.hflags & HF_SMM_MASK;
+}
+
#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index dc5b57b..beeef05 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -240,6 +240,15 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
recalculate_apic_map(apic->vcpu->kvm);
}
+static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
+{
+ u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
+
+ apic_set_reg(apic, APIC_ID, id << 24);
+ apic_set_reg(apic, APIC_LDR, ldr);
+ recalculate_apic_map(apic->vcpu->kvm);
+}
+
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
{
return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
@@ -799,7 +808,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
break;
case APIC_DM_SMI:
- apic_debug("Ignoring guest SMI\n");
+ result = 1;
+ kvm_make_request(KVM_REQ_SMI, vcpu);
+ kvm_vcpu_kick(vcpu);
break;
case APIC_DM_NMI:
@@ -1538,9 +1549,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if ((old_value ^ value) & X2APIC_ENABLE) {
if (value & X2APIC_ENABLE) {
- u32 id = kvm_apic_id(apic);
- u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
- kvm_apic_set_ldr(apic, ldr);
+ kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
} else
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
@@ -1587,7 +1596,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
apic_set_reg(apic, APIC_DFR, 0xffffffffU);
apic_set_spiv(apic, 0xff);
apic_set_reg(apic, APIC_TASKPRI, 0);
- kvm_apic_set_ldr(apic, 0);
+ if (!apic_x2apic_mode(apic))
+ kvm_apic_set_ldr(apic, 0);
apic_set_reg(apic, APIC_ESR, 0);
apic_set_reg(apic, APIC_ICR, 0);
apic_set_reg(apic, APIC_ICR2, 0);
@@ -2047,8 +2057,19 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
return;
- pe = xchg(&apic->pending_events, 0);
+ /*
+ * INITs are latched while in SMM. Because an SMM CPU cannot
+ * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
+ * and delay processing of INIT until the next RSM.
+ */
+ if (is_smm(vcpu)) {
+ WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
+ if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
+ clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+ return;
+ }
+ pe = xchg(&apic->pending_events, 0);
if (test_bit(KVM_APIC_INIT, &pe)) {
kvm_lapic_reset(vcpu, true);
kvm_vcpu_reset(vcpu, true);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 71b150c..f2f4e10 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -150,7 +150,7 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.apic->pending_events;
+ return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events;
}
static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
@@ -159,6 +159,11 @@ static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
irq->msi_redir_hint);
}
+static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+}
+
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
void wait_lapic_expire(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 49c34e6..c88f0e4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -223,15 +223,15 @@ static unsigned int get_mmio_spte_generation(u64 spte)
return gen;
}
-static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
+static unsigned int kvm_current_mmio_generation(struct kvm_vcpu *vcpu)
{
- return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
+ return kvm_vcpu_memslots(vcpu)->generation & MMIO_GEN_MASK;
}
-static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
+static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
unsigned access)
{
- unsigned int gen = kvm_current_mmio_generation(kvm);
+ unsigned int gen = kvm_current_mmio_generation(vcpu);
u64 mask = generation_mmio_spte_mask(gen);
access &= ACC_WRITE_MASK | ACC_USER_MASK;
@@ -258,22 +258,22 @@ static unsigned get_mmio_spte_access(u64 spte)
return (spte & ~mask) & ~PAGE_MASK;
}
-static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
+static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
pfn_t pfn, unsigned access)
{
if (unlikely(is_noslot_pfn(pfn))) {
- mark_mmio_spte(kvm, sptep, gfn, access);
+ mark_mmio_spte(vcpu, sptep, gfn, access);
return true;
}
return false;
}
-static bool check_mmio_spte(struct kvm *kvm, u64 spte)
+static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
{
unsigned int kvm_gen, spte_gen;
- kvm_gen = kvm_current_mmio_generation(kvm);
+ kvm_gen = kvm_current_mmio_generation(vcpu);
spte_gen = get_mmio_spte_generation(spte);
trace_check_mmio_spte(spte, kvm_gen, spte_gen);
@@ -804,13 +804,17 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
return &slot->arch.lpage_info[level - 2][idx];
}
-static void account_shadowed(struct kvm *kvm, gfn_t gfn)
+static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
struct kvm_lpage_info *linfo;
+ gfn_t gfn;
int i;
- slot = gfn_to_memslot(kvm, gfn);
+ gfn = sp->gfn;
+ slots = kvm_memslots_for_spte_role(kvm, sp->role);
+ slot = __gfn_to_memslot(slots, gfn);
for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
linfo = lpage_info_slot(gfn, slot, i);
linfo->write_count += 1;
@@ -818,13 +822,17 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
kvm->arch.indirect_shadow_pages++;
}
-static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
+static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
struct kvm_lpage_info *linfo;
+ gfn_t gfn;
int i;
- slot = gfn_to_memslot(kvm, gfn);
+ gfn = sp->gfn;
+ slots = kvm_memslots_for_spte_role(kvm, sp->role);
+ slot = __gfn_to_memslot(slots, gfn);
for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
linfo = lpage_info_slot(gfn, slot, i);
linfo->write_count -= 1;
@@ -833,14 +841,14 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
kvm->arch.indirect_shadow_pages--;
}
-static int has_wrprotected_page(struct kvm *kvm,
+static int has_wrprotected_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
int level)
{
struct kvm_memory_slot *slot;
struct kvm_lpage_info *linfo;
- slot = gfn_to_memslot(kvm, gfn);
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
if (slot) {
linfo = lpage_info_slot(gfn, slot, level);
return linfo->write_count;
@@ -872,7 +880,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
{
struct kvm_memory_slot *slot;
- slot = gfn_to_memslot(vcpu->kvm, gfn);
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
(no_dirty_log && slot->dirty_bitmap))
slot = NULL;
@@ -897,7 +905,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
- if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
+ if (has_wrprotected_page(vcpu, large_gfn, level))
break;
return level - 1;
@@ -1039,12 +1047,14 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
/*
* Take gfn and return the reverse mapping to it.
*/
-static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
+static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
- slot = gfn_to_memslot(kvm, gfn);
- return __gfn_to_rmap(gfn, level, slot);
+ slots = kvm_memslots_for_spte_role(kvm, sp->role);
+ slot = __gfn_to_memslot(slots, gfn);
+ return __gfn_to_rmap(gfn, sp->role.level, slot);
}
static bool rmap_can_add(struct kvm_vcpu *vcpu)
@@ -1062,7 +1072,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
sp = page_header(__pa(spte));
kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
- rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
+ rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
return pte_list_add(vcpu, spte, rmapp);
}
@@ -1074,7 +1084,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
sp = page_header(__pa(spte));
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
- rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
+ rmapp = gfn_to_rmap(kvm, gfn, sp);
pte_list_remove(spte, rmapp);
}
@@ -1332,18 +1342,18 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
-static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
+static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
{
struct kvm_memory_slot *slot;
unsigned long *rmapp;
int i;
bool write_protected = false;
- slot = gfn_to_memslot(kvm, gfn);
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
rmapp = __gfn_to_rmap(gfn, i, slot);
- write_protected |= __rmap_write_protect(kvm, rmapp, true);
+ write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true);
}
return write_protected;
@@ -1499,30 +1509,33 @@ static int kvm_handle_hva_range(struct kvm *kvm,
struct kvm_memory_slot *memslot;
struct slot_rmap_walk_iterator iterator;
int ret = 0;
+ int i;
- slots = kvm_memslots(kvm);
-
- kvm_for_each_memslot(memslot, slots) {
- unsigned long hva_start, hva_end;
- gfn_t gfn_start, gfn_end;
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ slots = __kvm_memslots(kvm, i);
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long hva_start, hva_end;
+ gfn_t gfn_start, gfn_end;
- hva_start = max(start, memslot->userspace_addr);
- hva_end = min(end, memslot->userspace_addr +
- (memslot->npages << PAGE_SHIFT));
- if (hva_start >= hva_end)
- continue;
- /*
- * {gfn(page) | page intersects with [hva_start, hva_end)} =
- * {gfn_start, gfn_start+1, ..., gfn_end-1}.
- */
- gfn_start = hva_to_gfn_memslot(hva_start, memslot);
- gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
-
- for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL,
- PT_MAX_HUGEPAGE_LEVEL, gfn_start, gfn_end - 1,
- &iterator)
- ret |= handler(kvm, iterator.rmap, memslot,
- iterator.gfn, iterator.level, data);
+ hva_start = max(start, memslot->userspace_addr);
+ hva_end = min(end, memslot->userspace_addr +
+ (memslot->npages << PAGE_SHIFT));
+ if (hva_start >= hva_end)
+ continue;
+ /*
+ * {gfn(page) | page intersects with [hva_start, hva_end)} =
+ * {gfn_start, gfn_start+1, ..., gfn_end-1}.
+ */
+ gfn_start = hva_to_gfn_memslot(hva_start, memslot);
+ gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+
+ for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL,
+ PT_MAX_HUGEPAGE_LEVEL,
+ gfn_start, gfn_end - 1,
+ &iterator)
+ ret |= handler(kvm, iterator.rmap, memslot,
+ iterator.gfn, iterator.level, data);
+ }
}
return ret;
@@ -1608,7 +1621,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
sp = page_header(__pa(spte));
- rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
+ rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
@@ -2028,7 +2041,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
bool protected = false;
for_each_sp(pages, sp, parents, i)
- protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
+ protected |= rmap_write_protect(vcpu, sp->gfn);
if (protected)
kvm_flush_remote_tlbs(vcpu->kvm);
@@ -2126,12 +2139,12 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
hlist_add_head(&sp->hash_link,
&vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
if (!direct) {
- if (rmap_write_protect(vcpu->kvm, gfn))
+ if (rmap_write_protect(vcpu, gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
if (level > PT_PAGE_TABLE_LEVEL && need_sync)
kvm_sync_pages(vcpu, gfn);
- account_shadowed(vcpu->kvm, gfn);
+ account_shadowed(vcpu->kvm, sp);
}
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
init_shadow_page_table(sp);
@@ -2312,7 +2325,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
kvm_mmu_unlink_parents(kvm, sp);
if (!sp->role.invalid && !sp->role.direct)
- unaccount_shadowed(kvm, sp->gfn);
+ unaccount_shadowed(kvm, sp);
if (sp->unsync)
kvm_unlink_unsync_page(kvm, sp);
@@ -2577,7 +2590,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
u64 spte;
int ret = 0;
- if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access))
+ if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
return 0;
spte = PT_PRESENT_MASK;
@@ -2614,7 +2627,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
* be fixed if guest refault.
*/
if (level > PT_PAGE_TABLE_LEVEL &&
- has_wrprotected_page(vcpu->kvm, gfn, level))
+ has_wrprotected_page(vcpu, gfn, level))
goto done;
spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
@@ -2638,7 +2651,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
if (pte_access & ACC_WRITE_MASK) {
- mark_page_dirty(vcpu->kvm, gfn);
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
spte |= shadow_dirty_mask;
}
@@ -2728,15 +2741,17 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
u64 *start, u64 *end)
{
struct page *pages[PTE_PREFETCH_NUM];
+ struct kvm_memory_slot *slot;
unsigned access = sp->role.access;
int i, ret;
gfn_t gfn;
gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
- if (!gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK))
+ slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK);
+ if (!slot)
return -1;
- ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start);
+ ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start);
if (ret <= 0)
return -1;
@@ -2854,7 +2869,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
return 1;
if (pfn == KVM_PFN_ERR_HWPOISON) {
- kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
+ kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current);
return 0;
}
@@ -2877,7 +2892,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
level == PT_PAGE_TABLE_LEVEL &&
PageTransCompound(pfn_to_page(pfn)) &&
- !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
+ !has_wrprotected_page(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
unsigned long mask;
/*
* mmu_notifier_retry was successful and we hold the
@@ -2969,7 +2984,7 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* Compare with set_spte where instead shadow_dirty_mask is set.
*/
if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte)
- mark_page_dirty(vcpu->kvm, gfn);
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
return true;
}
@@ -3424,7 +3439,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
gfn_t gfn = get_mmio_spte_gfn(spte);
unsigned access = get_mmio_spte_access(spte);
- if (!check_mmio_spte(vcpu->kvm, spte))
+ if (!check_mmio_spte(vcpu, spte))
return RET_MMIO_PF_INVALID;
if (direct)
@@ -3496,7 +3511,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu.direct_map;
arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
- return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
+ return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}
static bool can_do_async_pf(struct kvm_vcpu *vcpu)
@@ -3514,7 +3529,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
struct kvm_memory_slot *slot;
bool async;
- slot = gfn_to_memslot(vcpu->kvm, gfn);
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
async = false;
*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
if (!async)
@@ -3627,7 +3642,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
vcpu->arch.mmu.inject_page_fault(vcpu, fault);
}
-static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
+static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
unsigned access, int *nr_present)
{
if (unlikely(is_mmio_spte(*sptep))) {
@@ -3637,7 +3652,7 @@ static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
}
(*nr_present)++;
- mark_mmio_spte(kvm, sptep, gfn, access);
+ mark_mmio_spte(vcpu, sptep, gfn, access);
return true;
}
@@ -3915,6 +3930,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
struct kvm_mmu *context = &vcpu->arch.mmu;
context->base_role.word = 0;
+ context->base_role.smm = is_smm(vcpu);
context->page_fault = tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
@@ -3976,6 +3992,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
= smep && !is_write_protection(vcpu);
context->base_role.smap_andnot_wp
= smap && !is_write_protection(vcpu);
+ context->base_role.smm = is_smm(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
@@ -4147,7 +4164,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
*gpa &= ~(gpa_t)7;
*bytes = 8;
- r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);
+ r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
if (r)
gentry = 0;
new = (const u8 *)&gentry;
@@ -4252,13 +4269,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
u64 entry, gentry, *spte;
int npte;
bool remote_flush, local_flush, zap_page;
- union kvm_mmu_page_role mask = (union kvm_mmu_page_role) {
- .cr0_wp = 1,
- .cr4_pae = 1,
- .nxe = 1,
- .smep_andnot_wp = 1,
- .smap_andnot_wp = 1,
- };
+ union kvm_mmu_page_role mask = { };
+
+ mask.cr0_wp = 1;
+ mask.cr4_pae = 1;
+ mask.nxe = 1;
+ mask.smep_andnot_wp = 1;
+ mask.smap_andnot_wp = 1;
+ mask.smm = 1;
/*
* If we don't have indirect shadow pages, it means no page is
@@ -4530,21 +4548,23 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
-
- slots = kvm_memslots(kvm);
+ int i;
spin_lock(&kvm->mmu_lock);
- kvm_for_each_memslot(memslot, slots) {
- gfn_t start, end;
-
- start = max(gfn_start, memslot->base_gfn);
- end = min(gfn_end, memslot->base_gfn + memslot->npages);
- if (start >= end)
- continue;
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ slots = __kvm_memslots(kvm, i);
+ kvm_for_each_memslot(memslot, slots) {
+ gfn_t start, end;
+
+ start = max(gfn_start, memslot->base_gfn);
+ end = min(gfn_end, memslot->base_gfn + memslot->npages);
+ if (start >= end)
+ continue;
- slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
- PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
- start, end - 1, true);
+ slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
+ PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
+ start, end - 1, true);
+ }
}
spin_unlock(&kvm->mmu_lock);
@@ -4621,10 +4641,12 @@ restart:
}
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
+ const struct kvm_memory_slot *memslot)
{
+ /* FIXME: const-ify all uses of struct kvm_memory_slot. */
spin_lock(&kvm->mmu_lock);
- slot_handle_leaf(kvm, memslot, kvm_mmu_zap_collapsible_spte, true);
+ slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
+ kvm_mmu_zap_collapsible_spte, true);
spin_unlock(&kvm->mmu_lock);
}
@@ -4771,13 +4793,13 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
}
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
{
/*
* The very rare case: if the generation-number is round,
* zap all shadow pages.
*/
- if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
+ if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) {
printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n");
kvm_mmu_invalidate_zap_all_pages(kvm);
}
@@ -4899,15 +4921,18 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
unsigned int nr_pages = 0;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
+ int i;
- slots = kvm_memslots(kvm);
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ slots = __kvm_memslots(kvm, i);
- kvm_for_each_memslot(memslot, slots)
- nr_pages += memslot->npages;
+ kvm_for_each_memslot(memslot, slots)
+ nr_pages += memslot->npages;
+ }
nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
nr_mmu_pages = max(nr_mmu_pages,
- (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+ (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
return nr_mmu_pages;
}
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 368d534..a4f62e6 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -114,7 +114,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
return;
gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
- pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
+ pfn = kvm_vcpu_gfn_to_pfn_atomic(vcpu, gfn);
if (is_error_pfn(pfn))
return;
@@ -131,12 +131,16 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
unsigned long *rmapp;
struct kvm_mmu_page *rev_sp;
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *slot;
gfn_t gfn;
rev_sp = page_header(__pa(sptep));
gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
- if (!gfn_to_memslot(kvm, gfn)) {
+ slots = kvm_memslots_for_spte_role(kvm, rev_sp->role);
+ slot = __gfn_to_memslot(slots, gfn);
+ if (!slot) {
if (!__ratelimit(&ratelimit_state))
return;
audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
@@ -146,7 +150,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
return;
}
- rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
+ rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
if (!*rmapp) {
if (!__ratelimit(&ratelimit_state))
return;
@@ -191,11 +195,15 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
unsigned long *rmapp;
u64 *sptep;
struct rmap_iterator iter;
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *slot;
if (sp->role.direct || sp->unsync || sp->role.invalid)
return;
- rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL);
+ slots = kvm_memslots_for_spte_role(kvm, sp->role);
+ slot = __gfn_to_memslot(slots, sp->gfn);
+ rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
for_each_rmap_spte(rmapp, &iter, sptep)
if (is_writable_pte(*sptep))
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6e6d115..0f67d7e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -256,7 +256,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
if (ret)
return ret;
- mark_page_dirty(vcpu->kvm, table_gfn);
+ kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
walker->ptes[level] = pte;
}
return 0;
@@ -338,7 +338,7 @@ retry_walk:
real_gfn = gpa_to_gfn(real_gfn);
- host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
+ host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, real_gfn,
&walker->pte_writable[walker->level - 1]);
if (unlikely(kvm_is_error_hva(host_addr)))
goto error;
@@ -511,11 +511,11 @@ static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
base_gpa = pte_gpa & ~mask;
index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
- r = kvm_read_guest_atomic(vcpu->kvm, base_gpa,
+ r = kvm_vcpu_read_guest_atomic(vcpu, base_gpa,
gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
curr_pte = gw->prefetch_ptes[index];
} else
- r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa,
+ r = kvm_vcpu_read_guest_atomic(vcpu, pte_gpa,
&curr_pte, sizeof(curr_pte));
return r || curr_pte != gw->ptes[level - 1];
@@ -869,8 +869,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
if (!rmap_can_add(vcpu))
break;
- if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
- sizeof(pt_element_t)))
+ if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
+ sizeof(pt_element_t)))
break;
FNAME(update_pte)(vcpu, sp, sptep, &gpte);
@@ -956,8 +956,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
- if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
- sizeof(pt_element_t)))
+ if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
+ sizeof(pt_element_t)))
return -EINVAL;
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
@@ -970,7 +970,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
pte_access &= FNAME(gpte_access)(vcpu, gpte);
FNAME(protect_clean_gpte)(&pte_access, gpte);
- if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access,
+ if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
&nr_present))
continue;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8954d7a..6807531 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1955,8 +1955,8 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
u64 pdpte;
int ret;
- ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
- offset_in_page(cr3) + index * 8, 8);
+ ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
+ offset_in_page(cr3) + index * 8, 8);
if (ret)
return 0;
return pdpte;
@@ -2114,7 +2114,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
might_sleep();
- page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+ page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
if (is_error_page(page))
goto error;
@@ -2153,7 +2153,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
mask = (0xf >> (4 - size)) << start_bit;
val = 0;
- if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len))
+ if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
return NESTED_EXIT_DONE;
return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
@@ -2178,7 +2178,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
/* Offset is in 32 bit units but need in 8 bit units */
offset *= 4;
- if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
+ if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
return NESTED_EXIT_DONE;
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
@@ -2449,7 +2449,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
p = msrpm_offsets[i];
offset = svm->nested.vmcb_msrpm + (p * 4);
- if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
+ if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
return false;
svm->nested.msrpm[p] = svm->msrpm[p] | value;
@@ -3069,42 +3069,42 @@ static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
svm_scale_tsc(vcpu, host_tsc);
}
-static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
+static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_svm *svm = to_svm(vcpu);
- switch (ecx) {
+ switch (msr_info->index) {
case MSR_IA32_TSC: {
- *data = svm->vmcb->control.tsc_offset +
+ msr_info->data = svm->vmcb->control.tsc_offset +
svm_scale_tsc(vcpu, native_read_tsc());
break;
}
case MSR_STAR:
- *data = svm->vmcb->save.star;
+ msr_info->data = svm->vmcb->save.star;
break;
#ifdef CONFIG_X86_64
case MSR_LSTAR:
- *data = svm->vmcb->save.lstar;
+ msr_info->data = svm->vmcb->save.lstar;
break;
case MSR_CSTAR:
- *data = svm->vmcb->save.cstar;
+ msr_info->data = svm->vmcb->save.cstar;
break;
case MSR_KERNEL_GS_BASE:
- *data = svm->vmcb->save.kernel_gs_base;
+ msr_info->data = svm->vmcb->save.kernel_gs_base;
break;
case MSR_SYSCALL_MASK:
- *data = svm->vmcb->save.sfmask;
+ msr_info->data = svm->vmcb->save.sfmask;
break;
#endif
case MSR_IA32_SYSENTER_CS:
- *data = svm->vmcb->save.sysenter_cs;
+ msr_info->data = svm->vmcb->save.sysenter_cs;
break;
case MSR_IA32_SYSENTER_EIP:
- *data = svm->sysenter_eip;
+ msr_info->data = svm->sysenter_eip;
break;
case MSR_IA32_SYSENTER_ESP:
- *data = svm->sysenter_esp;
+ msr_info->data = svm->sysenter_esp;
break;
/*
* Nobody will change the following 5 values in the VMCB so we can
@@ -3112,31 +3112,31 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
* implemented.
*/
case MSR_IA32_DEBUGCTLMSR:
- *data = svm->vmcb->save.dbgctl;
+ msr_info->data = svm->vmcb->save.dbgctl;
break;
case MSR_IA32_LASTBRANCHFROMIP:
- *data = svm->vmcb->save.br_from;
+ msr_info->data = svm->vmcb->save.br_from;
break;
case MSR_IA32_LASTBRANCHTOIP:
- *data = svm->vmcb->save.br_to;
+ msr_info->data = svm->vmcb->save.br_to;
break;
case MSR_IA32_LASTINTFROMIP:
- *data = svm->vmcb->save.last_excp_from;
+ msr_info->data = svm->vmcb->save.last_excp_from;
break;
case MSR_IA32_LASTINTTOIP:
- *data = svm->vmcb->save.last_excp_to;
+ msr_info->data = svm->vmcb->save.last_excp_to;
break;
case MSR_VM_HSAVE_PA:
- *data = svm->nested.hsave_msr;
+ msr_info->data = svm->nested.hsave_msr;
break;
case MSR_VM_CR:
- *data = svm->nested.vm_cr_msr;
+ msr_info->data = svm->nested.vm_cr_msr;
break;
case MSR_IA32_UCODE_REV:
- *data = 0x01000065;
+ msr_info->data = 0x01000065;
break;
default:
- return kvm_get_msr_common(vcpu, ecx, data);
+ return kvm_get_msr_common(vcpu, msr_info);
}
return 0;
}
@@ -3144,16 +3144,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
static int rdmsr_interception(struct vcpu_svm *svm)
{
u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
- u64 data;
+ struct msr_data msr_info;
- if (svm_get_msr(&svm->vcpu, ecx, &data)) {
+ msr_info.index = ecx;
+ msr_info.host_initiated = false;
+ if (svm_get_msr(&svm->vcpu, &msr_info)) {
trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(&svm->vcpu, 0);
} else {
- trace_kvm_msr_read(ecx, data);
+ trace_kvm_msr_read(ecx, msr_info.data);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
+ msr_info.data & 0xffffffff);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
+ msr_info.data >> 32);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
skip_emulated_instruction(&svm->vcpu);
}
@@ -3390,6 +3394,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_NPF] = pf_interception,
+ [SVM_EXIT_RSM] = emulate_on_interception,
};
static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -4075,6 +4080,11 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
}
+static bool svm_has_high_real_mode_segbase(void)
+{
+ return true;
+}
+
static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
return 0;
@@ -4348,6 +4358,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
+ .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
.vcpu_create = svm_create_vcpu,
.vcpu_free = svm_free_vcpu,
@@ -4383,6 +4394,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
+ .fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate,
.tlb_flush = svm_flush_tlb,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 7c7bc8b..4eae7c3 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -952,6 +952,28 @@ TRACE_EVENT(kvm_wait_lapic_expire,
__entry->delta < 0 ? "early" : "late")
);
+TRACE_EVENT(kvm_enter_smm,
+ TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering),
+ TP_ARGS(vcpu_id, smbase, entering),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, vcpu_id )
+ __field( u64, smbase )
+ __field( bool, entering )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->smbase = smbase;
+ __entry->entering = entering;
+ ),
+
+ TP_printk("vcpu %u: %s SMM, smbase 0x%llx",
+ __entry->vcpu_id,
+ __entry->entering ? "entering" : "leaving",
+ __entry->smbase)
+);
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bcb61b0..06a186b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -786,7 +786,7 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
{
- struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
+ struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT);
if (is_error_page(page))
return NULL;
@@ -2622,76 +2622,69 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
- u64 data;
struct shared_msr_entry *msr;
- if (!pdata) {
- printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
- return -EINVAL;
- }
-
- switch (msr_index) {
+ switch (msr_info->index) {
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
- data = vmcs_readl(GUEST_FS_BASE);
+ msr_info->data = vmcs_readl(GUEST_FS_BASE);
break;
case MSR_GS_BASE:
- data = vmcs_readl(GUEST_GS_BASE);
+ msr_info->data = vmcs_readl(GUEST_GS_BASE);
break;
case MSR_KERNEL_GS_BASE:
vmx_load_host_state(to_vmx(vcpu));
- data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+ msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
break;
#endif
case MSR_EFER:
- return kvm_get_msr_common(vcpu, msr_index, pdata);
+ return kvm_get_msr_common(vcpu, msr_info);
case MSR_IA32_TSC:
- data = guest_read_tsc();
+ msr_info->data = guest_read_tsc();
break;
case MSR_IA32_SYSENTER_CS:
- data = vmcs_read32(GUEST_SYSENTER_CS);
+ msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
break;
case MSR_IA32_SYSENTER_EIP:
- data = vmcs_readl(GUEST_SYSENTER_EIP);
+ msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
break;
case MSR_IA32_SYSENTER_ESP:
- data = vmcs_readl(GUEST_SYSENTER_ESP);
+ msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_BNDCFGS:
if (!vmx_mpx_supported())
return 1;
- data = vmcs_read64(GUEST_BNDCFGS);
+ msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
case MSR_IA32_FEATURE_CONTROL:
if (!nested_vmx_allowed(vcpu))
return 1;
- data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+ msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
return 1;
- return vmx_get_vmx_msr(vcpu, msr_index, pdata);
+ return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
return 1;
- data = vcpu->arch.ia32_xss;
+ msr_info->data = vcpu->arch.ia32_xss;
break;
case MSR_TSC_AUX:
if (!to_vmx(vcpu)->rdtscp_enabled)
return 1;
/* Otherwise falls through */
default:
- msr = find_msr_entry(to_vmx(vcpu), msr_index);
+ msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
if (msr) {
- data = msr->data;
+ msr_info->data = msr->data;
break;
}
- return kvm_get_msr_common(vcpu, msr_index, pdata);
+ return kvm_get_msr_common(vcpu, msr_info);
}
- *pdata = data;
return 0;
}
@@ -4122,7 +4115,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
kvm_userspace_mem.flags = 0;
kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
if (r)
goto out;
@@ -4157,7 +4150,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
kvm_userspace_mem.guest_phys_addr =
kvm->arch.ept_identity_map_addr;
kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
return r;
}
@@ -4963,7 +4956,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
.flags = 0,
};
- ret = kvm_set_memory_region(kvm, &tss_mem);
+ ret = x86_set_memory_region(kvm, &tss_mem);
if (ret)
return ret;
kvm->arch.tss_addr = addr;
@@ -5473,19 +5466,21 @@ static int handle_cpuid(struct kvm_vcpu *vcpu)
static int handle_rdmsr(struct kvm_vcpu *vcpu)
{
u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
- u64 data;
+ struct msr_data msr_info;
- if (vmx_get_msr(vcpu, ecx, &data)) {
+ msr_info.index = ecx;
+ msr_info.host_initiated = false;
+ if (vmx_get_msr(vcpu, &msr_info)) {
trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(vcpu, 0);
return 1;
}
- trace_kvm_msr_read(ecx, data);
+ trace_kvm_msr_read(ecx, msr_info.data);
/* FIXME: handling of bits 32:63 of rax, rdx */
- vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
- vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
+ vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
+ vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
skip_emulated_instruction(vcpu);
return 1;
}
@@ -7328,7 +7323,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
bitmap += (port & 0x7fff) / 8;
if (last_bitmap != bitmap)
- if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
+ if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
return true;
if (b & (1 << (port & 7)))
return true;
@@ -7372,7 +7367,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
/* Then read the msr_index'th bit from this bitmap: */
if (msr_index < 1024*8) {
unsigned char b;
- if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
+ if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
return true;
return 1 & (b >> (msr_index & 7));
} else
@@ -7637,9 +7632,9 @@ static void vmx_disable_pml(struct vcpu_vmx *vmx)
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
-static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
+static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vmx->vcpu.kvm;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 *pml_buf;
u16 pml_idx;
@@ -7661,7 +7656,7 @@ static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
gpa = pml_buf[pml_idx];
WARN_ON(gpa & (PAGE_SIZE - 1));
- mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+ kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
}
/* reset PML index */
@@ -7856,7 +7851,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
* flushed already.
*/
if (enable_pml)
- vmx_flush_pml_buffer(vmx);
+ vmx_flush_pml_buffer(vcpu);
/* If guest state is invalid, start emulating */
if (vmx->emulation_required)
@@ -8144,6 +8139,11 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
local_irq_enable();
}
+static bool vmx_has_high_real_mode_segbase(void)
+{
+ return enable_unrestricted_guest || emulate_invalid_guest_state;
+}
+
static bool vmx_mpx_supported(void)
{
return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
@@ -9114,8 +9114,8 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
msr.host_initiated = false;
for (i = 0; i < count; i++) {
- if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e),
- &e, sizeof(e))) {
+ if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
+ &e, sizeof(e))) {
pr_warn_ratelimited(
"%s cannot read MSR entry (%u, 0x%08llx)\n",
__func__, i, gpa + i * sizeof(e));
@@ -9147,9 +9147,10 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
struct vmx_msr_entry e;
for (i = 0; i < count; i++) {
- if (kvm_read_guest(vcpu->kvm,
- gpa + i * sizeof(e),
- &e, 2 * sizeof(u32))) {
+ struct msr_data msr_info;
+ if (kvm_vcpu_read_guest(vcpu,
+ gpa + i * sizeof(e),
+ &e, 2 * sizeof(u32))) {
pr_warn_ratelimited(
"%s cannot read MSR entry (%u, 0x%08llx)\n",
__func__, i, gpa + i * sizeof(e));
@@ -9161,19 +9162,21 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
__func__, i, e.index, e.reserved);
return -EINVAL;
}
- if (kvm_get_msr(vcpu, e.index, &e.value)) {
+ msr_info.host_initiated = false;
+ msr_info.index = e.index;
+ if (kvm_get_msr(vcpu, &msr_info)) {
pr_warn_ratelimited(
"%s cannot read MSR (%u, 0x%x)\n",
__func__, i, e.index);
return -EINVAL;
}
- if (kvm_write_guest(vcpu->kvm,
- gpa + i * sizeof(e) +
- offsetof(struct vmx_msr_entry, value),
- &e.value, sizeof(e.value))) {
+ if (kvm_vcpu_write_guest(vcpu,
+ gpa + i * sizeof(e) +
+ offsetof(struct vmx_msr_entry, value),
+ &msr_info.data, sizeof(msr_info.data))) {
pr_warn_ratelimited(
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
- __func__, i, e.index, e.value);
+ __func__, i, e.index, msr_info.data);
return -EINVAL;
}
}
@@ -10298,6 +10301,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.hardware_enable = hardware_enable,
.hardware_disable = hardware_disable,
.cpu_has_accelerated_tpr = report_flexpriority,
+ .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
.vcpu_create = vmx_create_vcpu,
.vcpu_free = vmx_free_vcpu,
@@ -10333,6 +10337,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
+ .fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate,
.tlb_flush = vmx_flush_tlb,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 457b908..43f0df7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -99,6 +99,9 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
unsigned int min_timer_period_us = 500;
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
+static bool __read_mostly kvmclock_periodic_sync = true;
+module_param(kvmclock_periodic_sync, bool, S_IRUGO);
+
bool kvm_has_tsc_control;
EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
u32 kvm_max_guest_tsc_khz;
@@ -475,7 +478,7 @@ EXPORT_SYMBOL_GPL(kvm_require_dr);
/*
* This function will be used to read from the physical memory of the currently
- * running guest. The difference to kvm_read_guest_page is that this function
+ * running guest. The difference to kvm_vcpu_read_guest_page is that this function
* can read from guest physical or from the guest's guest physical memory.
*/
int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
@@ -493,7 +496,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
real_gfn = gpa_to_gfn(real_gfn);
- return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
+ return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
@@ -922,17 +925,11 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
*
* This list is modified at module load time to reflect the
* capabilities of the host cpu. This capabilities test skips MSRs that are
- * kvm-specific. Those are put in the beginning of the list.
+ * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
+ * may depend on host virtualization features rather than host cpu features.
*/
-#define KVM_SAVE_MSRS_BEGIN 12
static u32 msrs_to_save[] = {
- MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
- HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
- HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
- HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
- MSR_KVM_PV_EOI_EN,
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_STAR,
#ifdef CONFIG_X86_64
@@ -944,14 +941,24 @@ static u32 msrs_to_save[] = {
static unsigned num_msrs_to_save;
-static const u32 emulated_msrs[] = {
+static u32 emulated_msrs[] = {
+ MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+ MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
+ HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+ HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+ HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
+ MSR_KVM_PV_EOI_EN,
+
MSR_IA32_TSC_ADJUST,
MSR_IA32_TSCDEADLINE,
MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS,
MSR_IA32_MCG_CTL,
+ MSR_IA32_SMBASE,
};
+static unsigned num_emulated_msrs;
+
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & efer_reserved_bits)
@@ -1045,6 +1052,21 @@ EXPORT_SYMBOL_GPL(kvm_set_msr);
/*
* Adapt set_msr() to msr_io()'s calling convention
*/
+static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+ struct msr_data msr;
+ int r;
+
+ msr.index = index;
+ msr.host_initiated = true;
+ r = kvm_get_msr(vcpu, &msr);
+ if (r)
+ return r;
+
+ *data = msr.data;
+ return 0;
+}
+
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
struct msr_data msr;
@@ -1697,6 +1719,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->pvclock_set_guest_stopped_request = false;
}
+ pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO;
+
/* If the host uses TSC clocksource, then it is stable */
if (use_master_clock)
pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
@@ -1767,6 +1791,9 @@ static void kvmclock_sync_fn(struct work_struct *work)
kvmclock_sync_work);
struct kvm *kvm = container_of(ka, struct kvm, arch);
+ if (!kvmclock_periodic_sync)
+ return;
+
schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
KVMCLOCK_SYNC_PERIOD);
@@ -2003,7 +2030,7 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
r = PTR_ERR(page);
goto out;
}
- if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
+ if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
goto out_free;
r = 0;
out_free:
@@ -2103,13 +2130,13 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
}
gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
- addr = gfn_to_hva(vcpu->kvm, gfn);
+ addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
if (kvm_is_error_hva(addr))
return 1;
if (__clear_user((void __user *)addr, PAGE_SIZE))
return 1;
vcpu->arch.hv_vapic = data;
- mark_page_dirty(vcpu->kvm, gfn);
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
return 1;
break;
@@ -2256,6 +2283,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MISC_ENABLE:
vcpu->arch.ia32_misc_enable_msr = data;
break;
+ case MSR_IA32_SMBASE:
+ if (!msr_info->host_initiated)
+ return 1;
+ vcpu->arch.smbase = data;
+ break;
case MSR_KVM_WALL_CLOCK_NEW:
case MSR_KVM_WALL_CLOCK:
vcpu->kvm->arch.wall_clock = data;
@@ -2276,6 +2308,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
&vcpu->requests);
ka->boot_vcpu_runs_old_kvmclock = tmp;
+
+ ka->kvmclock_offset = -get_kernel_ns();
}
vcpu->arch.time = data;
@@ -2435,9 +2469,9 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common);
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
- return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
+ return kvm_x86_ops->get_msr(vcpu, msr);
}
EXPORT_SYMBOL_GPL(kvm_get_msr);
@@ -2574,11 +2608,11 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return 0;
}
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
u64 data;
- switch (msr) {
+ switch (msr_info->index) {
case MSR_IA32_PLATFORM_ID:
case MSR_IA32_EBL_CR_POWERON:
case MSR_IA32_DEBUGCTLMSR:
@@ -2601,26 +2635,26 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_AMD64_NB_CFG:
case MSR_FAM10H_MMIO_CONF_BASE:
case MSR_AMD64_BU_CFG2:
- data = 0;
+ msr_info->data = 0;
break;
case MSR_P6_PERFCTR0:
case MSR_P6_PERFCTR1:
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
- if (kvm_pmu_msr(vcpu, msr))
- return kvm_pmu_get_msr(vcpu, msr, pdata);
- data = 0;
+ if (kvm_pmu_msr(vcpu, msr_info->index))
+ return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
+ msr_info->data = 0;
break;
case MSR_IA32_UCODE_REV:
- data = 0x100000000ULL;
+ msr_info->data = 0x100000000ULL;
break;
case MSR_MTRRcap:
- data = 0x500 | KVM_NR_VAR_MTRR;
+ msr_info->data = 0x500 | KVM_NR_VAR_MTRR;
break;
case 0x200 ... 0x2ff:
- return get_msr_mtrr(vcpu, msr, pdata);
+ return get_msr_mtrr(vcpu, msr_info->index, &msr_info->data);
case 0xcd: /* fsb frequency */
- data = 3;
+ msr_info->data = 3;
break;
/*
* MSR_EBC_FREQUENCY_ID
@@ -2634,48 +2668,53 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
* multiplying by zero otherwise.
*/
case MSR_EBC_FREQUENCY_ID:
- data = 1 << 24;
+ msr_info->data = 1 << 24;
break;
case MSR_IA32_APICBASE:
- data = kvm_get_apic_base(vcpu);
+ msr_info->data = kvm_get_apic_base(vcpu);
break;
case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
- return kvm_x2apic_msr_read(vcpu, msr, pdata);
+ return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
break;
case MSR_IA32_TSCDEADLINE:
- data = kvm_get_lapic_tscdeadline_msr(vcpu);
+ msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
break;
case MSR_IA32_TSC_ADJUST:
- data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
+ msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
break;
case MSR_IA32_MISC_ENABLE:
- data = vcpu->arch.ia32_misc_enable_msr;
+ msr_info->data = vcpu->arch.ia32_misc_enable_msr;
+ break;
+ case MSR_IA32_SMBASE:
+ if (!msr_info->host_initiated)
+ return 1;
+ msr_info->data = vcpu->arch.smbase;
break;
case MSR_IA32_PERF_STATUS:
/* TSC increment by tick */
- data = 1000ULL;
+ msr_info->data = 1000ULL;
/* CPU multiplier */
data |= (((uint64_t)4ULL) << 40);
break;
case MSR_EFER:
- data = vcpu->arch.efer;
+ msr_info->data = vcpu->arch.efer;
break;
case MSR_KVM_WALL_CLOCK:
case MSR_KVM_WALL_CLOCK_NEW:
- data = vcpu->kvm->arch.wall_clock;
+ msr_info->data = vcpu->kvm->arch.wall_clock;
break;
case MSR_KVM_SYSTEM_TIME:
case MSR_KVM_SYSTEM_TIME_NEW:
- data = vcpu->arch.time;
+ msr_info->data = vcpu->arch.time;
break;
case MSR_KVM_ASYNC_PF_EN:
- data = vcpu->arch.apf.msr_val;
+ msr_info->data = vcpu->arch.apf.msr_val;
break;
case MSR_KVM_STEAL_TIME:
- data = vcpu->arch.st.msr_val;
+ msr_info->data = vcpu->arch.st.msr_val;
break;
case MSR_KVM_PV_EOI_EN:
- data = vcpu->arch.pv_eoi.msr_val;
+ msr_info->data = vcpu->arch.pv_eoi.msr_val;
break;
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
@@ -2683,7 +2722,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
- return get_msr_mce(vcpu, msr, pdata);
+ return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
case MSR_K7_CLK_CTL:
/*
* Provide expected ramp-up count for K7. All other
@@ -2694,17 +2733,17 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
* type 6, model 8 and higher from exploding due to
* the rdmsr failing.
*/
- data = 0x20000000;
+ msr_info->data = 0x20000000;
break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
- if (kvm_hv_msr_partition_wide(msr)) {
+ if (kvm_hv_msr_partition_wide(msr_info->index)) {
int r;
mutex_lock(&vcpu->kvm->lock);
- r = get_msr_hyperv_pw(vcpu, msr, pdata);
+ r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data);
mutex_unlock(&vcpu->kvm->lock);
return r;
} else
- return get_msr_hyperv(vcpu, msr, pdata);
+ return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data);
break;
case MSR_IA32_BBL_CR_CTL3:
/* This legacy MSR exists but isn't fully documented in current
@@ -2717,31 +2756,30 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
* L2 cache control register 3: 64GB range, 256KB size,
* enabled, latency 0x1, configured
*/
- data = 0xbe702111;
+ msr_info->data = 0xbe702111;
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has_osvw(vcpu))
return 1;
- data = vcpu->arch.osvw.length;
+ msr_info->data = vcpu->arch.osvw.length;
break;
case MSR_AMD64_OSVW_STATUS:
if (!guest_cpuid_has_osvw(vcpu))
return 1;
- data = vcpu->arch.osvw.status;
+ msr_info->data = vcpu->arch.osvw.status;
break;
default:
- if (kvm_pmu_msr(vcpu, msr))
- return kvm_pmu_get_msr(vcpu, msr, pdata);
+ if (kvm_pmu_msr(vcpu, msr_info->index))
+ return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
if (!ignore_msrs) {
- vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
+ vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
return 1;
} else {
- vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
- data = 0;
+ vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
+ msr_info->data = 0;
}
break;
}
- *pdata = data;
return 0;
}
EXPORT_SYMBOL_GPL(kvm_get_msr_common);
@@ -2862,6 +2900,17 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#endif
r = 1;
break;
+ case KVM_CAP_X86_SMM:
+ /* SMBASE is usually relocated above 1M on modern chipsets,
+ * and SMM handlers might indeed rely on 4G segment limits,
+ * so do not report SMM to be available if real mode is
+ * emulated via vm86 mode. Still, do not go to great lengths
+ * to avoid userspace's usage of the feature, because it is a
+ * fringe case that is not enabled except via specific settings
+ * of the module parameters.
+ */
+ r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
+ break;
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
@@ -2918,7 +2967,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
goto out;
n = msr_list.nmsrs;
- msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
+ msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
goto out;
r = -E2BIG;
@@ -2930,7 +2979,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
goto out;
if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
&emulated_msrs,
- ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
+ num_emulated_msrs * sizeof(u32)))
goto out;
r = 0;
break;
@@ -3074,6 +3123,13 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
return 0;
}
+static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
+{
+ kvm_make_request(KVM_REQ_SMI, vcpu);
+
+ return 0;
+}
+
static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
struct kvm_tpr_access_ctl *tac)
{
@@ -3179,8 +3235,15 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->sipi_vector = 0; /* never valid when reporting to user space */
+ events->smi.smm = is_smm(vcpu);
+ events->smi.pending = vcpu->arch.smi_pending;
+ events->smi.smm_inside_nmi =
+ !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
+ events->smi.latched_init = kvm_lapic_latched_init(vcpu);
+
events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
- | KVM_VCPUEVENT_VALID_SHADOW);
+ | KVM_VCPUEVENT_VALID_SHADOW
+ | KVM_VCPUEVENT_VALID_SMM);
memset(&events->reserved, 0, sizeof(events->reserved));
}
@@ -3189,7 +3252,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
{
if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
- | KVM_VCPUEVENT_VALID_SHADOW))
+ | KVM_VCPUEVENT_VALID_SHADOW
+ | KVM_VCPUEVENT_VALID_SMM))
return -EINVAL;
process_nmi(vcpu);
@@ -3214,6 +3278,24 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
kvm_vcpu_has_lapic(vcpu))
vcpu->arch.apic->sipi_vector = events->sipi_vector;
+ if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+ if (events->smi.smm)
+ vcpu->arch.hflags |= HF_SMM_MASK;
+ else
+ vcpu->arch.hflags &= ~HF_SMM_MASK;
+ vcpu->arch.smi_pending = events->smi.pending;
+ if (events->smi.smm_inside_nmi)
+ vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+ else
+ vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
+ if (kvm_vcpu_has_lapic(vcpu)) {
+ if (events->smi.latched_init)
+ set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+ else
+ clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+ }
+ }
+
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
@@ -3473,6 +3555,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_nmi(vcpu);
break;
}
+ case KVM_SMI: {
+ r = kvm_vcpu_ioctl_smi(vcpu);
+ break;
+ }
case KVM_SET_CPUID: {
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
@@ -3512,7 +3598,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_MSRS:
- r = msr_io(vcpu, argp, kvm_get_msr, 1);
+ r = msr_io(vcpu, argp, do_get_msr, 1);
break;
case KVM_SET_MSRS:
r = msr_io(vcpu, argp, do_set_msr, 0);
@@ -4196,8 +4282,7 @@ static void kvm_init_msr_list(void)
u32 dummy[2];
unsigned i, j;
- /* skip the first msrs in the list. KVM-specific */
- for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
+ for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
continue;
@@ -4222,6 +4307,22 @@ static void kvm_init_msr_list(void)
j++;
}
num_msrs_to_save = j;
+
+ for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
+ switch (emulated_msrs[i]) {
+ case MSR_IA32_SMBASE:
+ if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ if (j < i)
+ emulated_msrs[j] = emulated_msrs[i];
+ j++;
+ }
+ num_emulated_msrs = j;
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -4339,8 +4440,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
- ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data,
- offset, toread);
+ ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
+ offset, toread);
if (ret < 0) {
r = X86EMUL_IO_NEEDED;
goto out;
@@ -4373,8 +4474,8 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
offset = addr & (PAGE_SIZE-1);
if (WARN_ON(offset + bytes > PAGE_SIZE))
bytes = (unsigned)PAGE_SIZE - offset;
- ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val,
- offset, bytes);
+ ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
+ offset, bytes);
if (unlikely(ret < 0))
return X86EMUL_IO_NEEDED;
@@ -4420,7 +4521,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
- ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
+ ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
if (ret < 0) {
r = X86EMUL_IO_NEEDED;
goto out;
@@ -4473,7 +4574,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
{
int ret;
- ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
+ ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
if (ret < 0)
return 0;
kvm_mmu_pte_write(vcpu, gpa, val, bytes);
@@ -4507,7 +4608,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
+ return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
}
static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -4705,7 +4806,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
goto emul_write;
- page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+ page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
if (is_error_page(page))
goto emul_write;
@@ -4733,7 +4834,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (!exchanged)
return X86EMUL_CMPXCHG_FAILED;
- mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
+ kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
kvm_mmu_pte_write(vcpu, gpa, new, bytes);
return X86EMUL_CONTINUE;
@@ -5032,7 +5133,17 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
u32 msr_index, u64 *pdata)
{
- return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+ struct msr_data msr;
+ int r;
+
+ msr.index = msr_index;
+ msr.host_initiated = false;
+ r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
+ if (r)
+ return r;
+
+ *pdata = msr.data;
+ return 0;
}
static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
@@ -5046,6 +5157,20 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
}
+static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
+{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+ return vcpu->arch.smbase;
+}
+
+static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
+{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+ vcpu->arch.smbase = smbase;
+}
+
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc)
{
@@ -5131,6 +5256,8 @@ static const struct x86_emulate_ops emulate_ops = {
.cpl = emulator_get_cpl,
.get_dr = emulator_get_dr,
.set_dr = emulator_set_dr,
+ .get_smbase = emulator_get_smbase,
+ .set_smbase = emulator_set_smbase,
.set_msr = emulator_set_msr,
.get_msr = emulator_get_msr,
.check_pmc = emulator_check_pmc,
@@ -5192,7 +5319,10 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
(cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
cs_db ? X86EMUL_MODE_PROT32 :
X86EMUL_MODE_PROT16;
- ctxt->guest_mode = is_guest_mode(vcpu);
+ BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
+ BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
+ BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
+ ctxt->emul_flags = vcpu->arch.hflags;
init_decode_cache(ctxt);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
@@ -5361,6 +5491,34 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
static int complete_emulated_pio(struct kvm_vcpu *vcpu);
+static void kvm_smm_changed(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
+ /* This is a good place to trace that we are exiting SMM. */
+ trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
+
+ if (unlikely(vcpu->arch.smi_pending)) {
+ kvm_make_request(KVM_REQ_SMI, vcpu);
+ vcpu->arch.smi_pending = 0;
+ } else {
+ /* Process a latched INIT, if any. */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
+ }
+
+ kvm_mmu_reset_context(vcpu);
+}
+
+static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
+{
+ unsigned changed = vcpu->arch.hflags ^ emul_flags;
+
+ vcpu->arch.hflags = emul_flags;
+
+ if (changed & HF_SMM_MASK)
+ kvm_smm_changed(vcpu);
+}
+
static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
unsigned long *db)
{
@@ -5560,6 +5718,8 @@ restart:
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+ if (vcpu->arch.hflags != ctxt->emul_flags)
+ kvm_set_hflags(vcpu, ctxt->emul_flags);
kvm_rip_write(vcpu, ctxt->eip);
if (r == EMULATE_DONE)
kvm_vcpu_check_singlestep(vcpu, rflags, &r);
@@ -6126,6 +6286,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+ kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
@@ -6249,6 +6410,233 @@ static void process_nmi(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
+#define put_smstate(type, buf, offset, val) \
+ *(type *)((buf) + (offset) - 0x7e00) = val
+
+static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
+{
+ u32 flags = 0;
+ flags |= seg->g << 23;
+ flags |= seg->db << 22;
+ flags |= seg->l << 21;
+ flags |= seg->avl << 20;
+ flags |= seg->present << 15;
+ flags |= seg->dpl << 13;
+ flags |= seg->s << 12;
+ flags |= seg->type << 8;
+ return flags;
+}
+
+static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+ struct kvm_segment seg;
+ int offset;
+
+ kvm_get_segment(vcpu, &seg, n);
+ put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
+
+ if (n < 3)
+ offset = 0x7f84 + n * 12;
+ else
+ offset = 0x7f2c + (n - 3) * 12;
+
+ put_smstate(u32, buf, offset + 8, seg.base);
+ put_smstate(u32, buf, offset + 4, seg.limit);
+ put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
+}
+
+static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+ struct kvm_segment seg;
+ int offset;
+ u16 flags;
+
+ kvm_get_segment(vcpu, &seg, n);
+ offset = 0x7e00 + n * 16;
+
+ flags = process_smi_get_segment_flags(&seg) >> 8;
+ put_smstate(u16, buf, offset, seg.selector);
+ put_smstate(u16, buf, offset + 2, flags);
+ put_smstate(u32, buf, offset + 4, seg.limit);
+ put_smstate(u64, buf, offset + 8, seg.base);
+}
+
+static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+{
+ struct desc_ptr dt;
+ struct kvm_segment seg;
+ unsigned long val;
+ int i;
+
+ put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+ put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+ put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+ put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+
+ for (i = 0; i < 8; i++)
+ put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
+
+ kvm_get_dr(vcpu, 6, &val);
+ put_smstate(u32, buf, 0x7fcc, (u32)val);
+ kvm_get_dr(vcpu, 7, &val);
+ put_smstate(u32, buf, 0x7fc8, (u32)val);
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+ put_smstate(u32, buf, 0x7fc4, seg.selector);
+ put_smstate(u32, buf, 0x7f64, seg.base);
+ put_smstate(u32, buf, 0x7f60, seg.limit);
+ put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg));
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+ put_smstate(u32, buf, 0x7fc0, seg.selector);
+ put_smstate(u32, buf, 0x7f80, seg.base);
+ put_smstate(u32, buf, 0x7f7c, seg.limit);
+ put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg));
+
+ kvm_x86_ops->get_gdt(vcpu, &dt);
+ put_smstate(u32, buf, 0x7f74, dt.address);
+ put_smstate(u32, buf, 0x7f70, dt.size);
+
+ kvm_x86_ops->get_idt(vcpu, &dt);
+ put_smstate(u32, buf, 0x7f58, dt.address);
+ put_smstate(u32, buf, 0x7f54, dt.size);
+
+ for (i = 0; i < 6; i++)
+ process_smi_save_seg_32(vcpu, buf, i);
+
+ put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+
+ /* revision id */
+ put_smstate(u32, buf, 0x7efc, 0x00020000);
+ put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+}
+
+static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+{
+#ifdef CONFIG_X86_64
+ struct desc_ptr dt;
+ struct kvm_segment seg;
+ unsigned long val;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
+
+ put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+ put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+
+ kvm_get_dr(vcpu, 6, &val);
+ put_smstate(u64, buf, 0x7f68, val);
+ kvm_get_dr(vcpu, 7, &val);
+ put_smstate(u64, buf, 0x7f60, val);
+
+ put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+ put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+ put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+
+ put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
+
+ /* revision id */
+ put_smstate(u32, buf, 0x7efc, 0x00020064);
+
+ put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+ put_smstate(u16, buf, 0x7e90, seg.selector);
+ put_smstate(u16, buf, 0x7e92, process_smi_get_segment_flags(&seg) >> 8);
+ put_smstate(u32, buf, 0x7e94, seg.limit);
+ put_smstate(u64, buf, 0x7e98, seg.base);
+
+ kvm_x86_ops->get_idt(vcpu, &dt);
+ put_smstate(u32, buf, 0x7e84, dt.size);
+ put_smstate(u64, buf, 0x7e88, dt.address);
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+ put_smstate(u16, buf, 0x7e70, seg.selector);
+ put_smstate(u16, buf, 0x7e72, process_smi_get_segment_flags(&seg) >> 8);
+ put_smstate(u32, buf, 0x7e74, seg.limit);
+ put_smstate(u64, buf, 0x7e78, seg.base);
+
+ kvm_x86_ops->get_gdt(vcpu, &dt);
+ put_smstate(u32, buf, 0x7e64, dt.size);
+ put_smstate(u64, buf, 0x7e68, dt.address);
+
+ for (i = 0; i < 6; i++)
+ process_smi_save_seg_64(vcpu, buf, i);
+#else
+ WARN_ON_ONCE(1);
+#endif
+}
+
+static void process_smi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment cs, ds;
+ char buf[512];
+ u32 cr0;
+
+ if (is_smm(vcpu)) {
+ vcpu->arch.smi_pending = true;
+ return;
+ }
+
+ trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
+ vcpu->arch.hflags |= HF_SMM_MASK;
+ memset(buf, 0, 512);
+ if (guest_cpuid_has_longmode(vcpu))
+ process_smi_save_state_64(vcpu, buf);
+ else
+ process_smi_save_state_32(vcpu, buf);
+
+ kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+
+ if (kvm_x86_ops->get_nmi_mask(vcpu))
+ vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+ else
+ kvm_x86_ops->set_nmi_mask(vcpu, true);
+
+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+ kvm_rip_write(vcpu, 0x8000);
+
+ cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
+ kvm_x86_ops->set_cr0(vcpu, cr0);
+ vcpu->arch.cr0 = cr0;
+
+ kvm_x86_ops->set_cr4(vcpu, 0);
+
+ __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+
+ cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
+ cs.base = vcpu->arch.smbase;
+
+ ds.selector = 0;
+ ds.base = 0;
+
+ cs.limit = ds.limit = 0xffffffff;
+ cs.type = ds.type = 0x3;
+ cs.dpl = ds.dpl = 0;
+ cs.db = ds.db = 0;
+ cs.s = ds.s = 1;
+ cs.l = ds.l = 0;
+ cs.g = ds.g = 1;
+ cs.avl = ds.avl = 0;
+ cs.present = ds.present = 1;
+ cs.unusable = ds.unusable = 0;
+ cs.padding = ds.padding = 0;
+
+ kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+
+ if (guest_cpuid_has_longmode(vcpu))
+ kvm_x86_ops->set_efer(vcpu, 0);
+
+ kvm_update_cpuid(vcpu);
+ kvm_mmu_reset_context(vcpu);
+}
+
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
{
u64 eoi_exit_bitmap[4];
@@ -6282,6 +6670,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
return;
page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (is_error_page(page))
+ return;
kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
/*
@@ -6355,6 +6745,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
record_steal_time(vcpu);
+ if (kvm_check_request(KVM_REQ_SMI, vcpu))
+ process_smi(vcpu);
if (kvm_check_request(KVM_REQ_NMI, vcpu))
process_nmi(vcpu);
if (kvm_check_request(KVM_REQ_PMU, vcpu))
@@ -7155,7 +7547,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
* Every 255 times fpu_counter rolls over to 0; a guest that uses
* the FPU in bursts will revert to loading it on demand.
*/
- if (!use_eager_fpu()) {
+ if (!vcpu->arch.eager_fpu) {
if (++vcpu->fpu_counter < 5)
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
}
@@ -7174,11 +7566,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
+ struct kvm_vcpu *vcpu;
+
if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING
"kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n");
- return kvm_x86_ops->vcpu_create(kvm, id);
+
+ vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+
+ /*
+ * Activate fpu unconditionally in case the guest needs eager FPU. It will be
+ * deactivated soon if it doesn't.
+ */
+ kvm_x86_ops->fpu_activate(vcpu);
+ return vcpu;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -7209,6 +7611,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu);
+ if (!kvmclock_periodic_sync)
+ return;
+
schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
KVMCLOCK_SYNC_PERIOD);
}
@@ -7229,6 +7634,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
+ vcpu->arch.hflags = 0;
+
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
vcpu->arch.nmi_injected = false;
@@ -7254,8 +7661,10 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;
- if (!init_event)
+ if (!init_event) {
kvm_pmu_reset(vcpu);
+ vcpu->arch.smbase = 0x30000;
+ }
memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
vcpu->arch.regs_avail = ~0;
@@ -7571,6 +7980,40 @@ void kvm_arch_sync_events(struct kvm *kvm)
kvm_free_pit(kvm);
}
+int __x86_set_memory_region(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem)
+{
+ int i, r;
+
+ /* Called with kvm->slots_lock held. */
+ BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM);
+
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ struct kvm_userspace_memory_region m = *mem;
+
+ m.slot |= i << 16;
+ r = __kvm_set_memory_region(kvm, &m);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__x86_set_memory_region);
+
+int x86_set_memory_region(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem)
+{
+ int r;
+
+ mutex_lock(&kvm->slots_lock);
+ r = __x86_set_memory_region(kvm, mem);
+ mutex_unlock(&kvm->slots_lock);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(x86_set_memory_region);
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
if (current->mm == kvm->mm) {
@@ -7582,13 +8025,13 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
struct kvm_userspace_memory_region mem;
memset(&mem, 0, sizeof(mem));
mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- kvm_set_memory_region(kvm, &mem);
+ x86_set_memory_region(kvm, &mem);
mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- kvm_set_memory_region(kvm, &mem);
+ x86_set_memory_region(kvm, &mem);
mem.slot = TSS_PRIVATE_MEMSLOT;
- kvm_set_memory_region(kvm, &mem);
+ x86_set_memory_region(kvm, &mem);
}
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
@@ -7677,18 +8120,18 @@ out_free:
return -ENOMEM;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
+void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
{
/*
* memslots->generation has been incremented.
* mmio generation may have reached its maximum value.
*/
- kvm_mmu_invalidate_mmio_sptes(kvm);
+ kvm_mmu_invalidate_mmio_sptes(kvm, slots);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
/*
@@ -7766,14 +8209,14 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- struct kvm_memory_slot *new;
int nr_mmu_pages = 0;
- if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
+ if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) {
int ret;
ret = vm_munmap(old->userspace_addr,
@@ -7790,9 +8233,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
if (nr_mmu_pages)
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
- /* It's OK to get 'new' slot here as it has already been installed */
- new = id_to_memslot(kvm->memslots, mem->slot);
-
/*
* Dirty logging tracks sptes in 4k granularity, meaning that large
* sptes have to be split. If live migration is successful, the guest
@@ -7817,9 +8257,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* been zapped so no dirty logging staff is needed for old slot. For
* KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the
* new and it's also covered when dealing with the new slot.
+ *
+ * FIXME: const-ify all uses of struct kvm_memory_slot.
*/
if (change != KVM_MR_DELETE)
- kvm_mmu_slot_apply_flags(kvm, new);
+ kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
}
void kvm_arch_flush_shadow_all(struct kvm *kvm)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 87fd74a..9564fd7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -44,6 +44,10 @@
/* Two fragments for cross MMIO pages. */
#define KVM_MAX_MMIO_FRAGMENTS 2
+#ifndef KVM_ADDRESS_SPACE_NUM
+#define KVM_ADDRESS_SPACE_NUM 1
+#endif
+
/*
* For the normal pfn, the highest 12 bits should be zero,
* so we can mask bit 62 ~ bit 52 to indicate the error pfn,
@@ -134,6 +138,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_ENABLE_IBS 23
#define KVM_REQ_DISABLE_IBS 24
#define KVM_REQ_APIC_PAGE_RELOAD 25
+#define KVM_REQ_SMI 26
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@@ -330,6 +335,13 @@ struct kvm_kernel_irq_routing_entry {
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
#endif
+#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+#endif
+
/*
* Note:
* memslots are not sorted by id anymore, please use id_to_memslot()
@@ -348,7 +360,7 @@ struct kvm {
spinlock_t mmu_lock;
struct mutex slots_lock;
struct mm_struct *mm; /* userspace tied to this vm */
- struct kvm_memslots *memslots;
+ struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
struct srcu_struct srcu;
struct srcu_struct irq_srcu;
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
@@ -463,13 +475,25 @@ void kvm_exit(void);
void kvm_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
-static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
+static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
- return rcu_dereference_check(kvm->memslots,
+ return rcu_dereference_check(kvm->memslots[as_id],
srcu_read_lock_held(&kvm->srcu)
|| lockdep_is_held(&kvm->slots_lock));
}
+static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
+{
+ return __kvm_memslots(kvm, 0);
+}
+
+static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
+{
+ int as_id = kvm_arch_vcpu_memslots_id(vcpu);
+
+ return __kvm_memslots(vcpu->kvm, as_id);
+}
+
static inline struct kvm_memory_slot *
id_to_memslot(struct kvm_memslots *slots, int id)
{
@@ -501,21 +525,22 @@ enum kvm_mr_change {
};
int kvm_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem);
+ const struct kvm_userspace_memory_region *mem);
int __kvm_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem);
+ const struct kvm_userspace_memory_region *mem);
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages);
-void kvm_arch_memslots_updated(struct kvm *kvm);
+void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change);
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change);
bool kvm_largepages_enabled(void);
void kvm_disable_largepages(void);
@@ -525,8 +550,8 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm);
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot);
-int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
- int nr_pages);
+int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
+ struct page **pages, int nr_pages);
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
@@ -574,6 +599,25 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
+struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
+struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
+pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
+pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
+unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
+unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
+int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
+ int len);
+int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
+ unsigned long len);
+int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
+ unsigned long len);
+int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data,
+ int offset, int len);
+int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
+ unsigned long len);
+void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
+
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 931da7e..1b47a18 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -28,6 +28,7 @@ struct kvm_run;
struct kvm_userspace_memory_region;
struct kvm_vcpu;
struct kvm_vcpu_init;
+struct kvm_memslots;
enum kvm_mr_change;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cbfd1ac..716ad4a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -202,7 +202,7 @@ struct kvm_run {
__u32 exit_reason;
__u8 ready_for_interrupt_injection;
__u8 if_flag;
- __u8 padding2[2];
+ __u16 flags;
/* in (pre_kvm_run), out (post_kvm_run) */
__u64 cr8;
@@ -815,6 +815,8 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_IRQ_STATE 114
#define KVM_CAP_PPC_HWRNG 115
#define KVM_CAP_DISABLE_QUIRKS 116
+#define KVM_CAP_X86_SMM 117
+#define KVM_CAP_MULTI_ADDRESS_SPACE 118
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1200,6 +1202,8 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_S390_IRQ_STATE */
#define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
+/* Available with KVM_CAP_X86_SMM */
+#define KVM_SMI _IO(KVMIO, 0xb7)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bd3c08a..848af90 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -103,8 +103,7 @@ static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
static void kvm_release_pfn_dirty(pfn_t pfn);
-static void mark_page_dirty_in_slot(struct kvm *kvm,
- struct kvm_memory_slot *memslot, gfn_t gfn);
+static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
@@ -440,13 +439,60 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
-static void kvm_init_memslots_id(struct kvm *kvm)
+static struct kvm_memslots *kvm_alloc_memslots(void)
{
int i;
- struct kvm_memslots *slots = kvm->memslots;
+ struct kvm_memslots *slots;
+ slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
+ if (!slots)
+ return NULL;
+
+ /*
+ * Init kvm generation close to the maximum to easily test the
+ * code of handling generation number wrap-around.
+ */
+ slots->generation = -150;
for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
slots->id_to_index[i] = slots->memslots[i].id = i;
+
+ return slots;
+}
+
+static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+ if (!memslot->dirty_bitmap)
+ return;
+
+ kvfree(memslot->dirty_bitmap);
+ memslot->dirty_bitmap = NULL;
+}
+
+/*
+ * Free any memory in @free but not in @dont.
+ */
+static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
+ kvm_destroy_dirty_bitmap(free);
+
+ kvm_arch_free_memslot(kvm, free, dont);
+
+ free->npages = 0;
+}
+
+static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
+{
+ struct kvm_memory_slot *memslot;
+
+ if (!slots)
+ return;
+
+ kvm_for_each_memslot(memslot, slots)
+ kvm_free_memslot(kvm, memslot, NULL);
+
+ kvfree(slots);
}
static struct kvm *kvm_create_vm(unsigned long type)
@@ -472,17 +518,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
r = -ENOMEM;
- kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots));
- if (!kvm->memslots)
- goto out_err_no_srcu;
-
- /*
- * Init kvm generation close to the maximum to easily test the
- * code of handling generation number wrap-around.
- */
- kvm->memslots->generation = -150;
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ kvm->memslots[i] = kvm_alloc_memslots();
+ if (!kvm->memslots[i])
+ goto out_err_no_srcu;
+ }
- kvm_init_memslots_id(kvm);
if (init_srcu_struct(&kvm->srcu))
goto out_err_no_srcu;
if (init_srcu_struct(&kvm->irq_srcu))
@@ -523,7 +564,8 @@ out_err_no_srcu:
out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
- kvfree(kvm->memslots);
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+ kvm_free_memslots(kvm, kvm->memslots[i]);
kvm_arch_free_vm(kvm);
return ERR_PTR(r);
}
@@ -540,40 +582,6 @@ void *kvm_kvzalloc(unsigned long size)
return kzalloc(size, GFP_KERNEL);
}
-static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
-{
- if (!memslot->dirty_bitmap)
- return;
-
- kvfree(memslot->dirty_bitmap);
- memslot->dirty_bitmap = NULL;
-}
-
-/*
- * Free any memory in @free but not in @dont.
- */
-static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
- if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
- kvm_destroy_dirty_bitmap(free);
-
- kvm_arch_free_memslot(kvm, free, dont);
-
- free->npages = 0;
-}
-
-static void kvm_free_physmem(struct kvm *kvm)
-{
- struct kvm_memslots *slots = kvm->memslots;
- struct kvm_memory_slot *memslot;
-
- kvm_for_each_memslot(memslot, slots)
- kvm_free_physmem_slot(kvm, memslot, NULL);
-
- kvfree(kvm->memslots);
-}
-
static void kvm_destroy_devices(struct kvm *kvm)
{
struct list_head *node, *tmp;
@@ -607,7 +615,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
#endif
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
- kvm_free_physmem(kvm);
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+ kvm_free_memslots(kvm, kvm->memslots[i]);
cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
@@ -670,8 +679,6 @@ static void update_memslots(struct kvm_memslots *slots,
WARN_ON(mslots[i].id != id);
if (!new->npages) {
WARN_ON(!mslots[i].npages);
- new->base_gfn = 0;
- new->flags = 0;
if (mslots[i].npages)
slots->used_slots--;
} else {
@@ -711,7 +718,7 @@ static void update_memslots(struct kvm_memslots *slots,
slots->id_to_index[mslots[i].id] = i;
}
-static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
+static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
{
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
@@ -726,9 +733,9 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
}
static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
- struct kvm_memslots *slots)
+ int as_id, struct kvm_memslots *slots)
{
- struct kvm_memslots *old_memslots = kvm->memslots;
+ struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
/*
* Set the low bit in the generation, which disables SPTE caching
@@ -737,7 +744,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
WARN_ON(old_memslots->generation & 1);
slots->generation = old_memslots->generation + 1;
- rcu_assign_pointer(kvm->memslots, slots);
+ rcu_assign_pointer(kvm->memslots[as_id], slots);
synchronize_srcu_expedited(&kvm->srcu);
/*
@@ -747,7 +754,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
*/
slots->generation++;
- kvm_arch_memslots_updated(kvm);
+ kvm_arch_memslots_updated(kvm, slots);
return old_memslots;
}
@@ -761,7 +768,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
* Must be called holding kvm->slots_lock for write.
*/
int __kvm_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem)
{
int r;
gfn_t base_gfn;
@@ -769,6 +776,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_memory_slot *slot;
struct kvm_memory_slot old, new;
struct kvm_memslots *slots = NULL, *old_memslots;
+ int as_id, id;
enum kvm_mr_change change;
r = check_memory_region_flags(mem);
@@ -776,36 +784,36 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out;
r = -EINVAL;
+ as_id = mem->slot >> 16;
+ id = (u16)mem->slot;
+
/* General sanity checks */
if (mem->memory_size & (PAGE_SIZE - 1))
goto out;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
/* We can read the guest memory with __xxx_user() later on. */
- if ((mem->slot < KVM_USER_MEM_SLOTS) &&
+ if ((id < KVM_USER_MEM_SLOTS) &&
((mem->userspace_addr & (PAGE_SIZE - 1)) ||
!access_ok(VERIFY_WRITE,
(void __user *)(unsigned long)mem->userspace_addr,
mem->memory_size)))
goto out;
- if (mem->slot >= KVM_MEM_SLOTS_NUM)
+ if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
goto out;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
- slot = id_to_memslot(kvm->memslots, mem->slot);
+ slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
if (npages > KVM_MEM_MAX_NR_PAGES)
goto out;
- if (!npages)
- mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
-
new = old = *slot;
- new.id = mem->slot;
+ new.id = id;
new.base_gfn = base_gfn;
new.npages = npages;
new.flags = mem->flags;
@@ -828,17 +836,21 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out;
}
}
- } else if (old.npages) {
+ } else {
+ if (!old.npages)
+ goto out;
+
change = KVM_MR_DELETE;
- } else /* Modify a non-existent slot: disallowed. */
- goto out;
+ new.base_gfn = 0;
+ new.flags = 0;
+ }
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
/* Check for overlaps */
r = -EEXIST;
- kvm_for_each_memslot(slot, kvm->memslots) {
+ kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
if ((slot->id >= KVM_USER_MEM_SLOTS) ||
- (slot->id == mem->slot))
+ (slot->id == id))
continue;
if (!((base_gfn + npages <= slot->base_gfn) ||
(base_gfn >= slot->base_gfn + slot->npages)))
@@ -867,13 +879,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
if (!slots)
goto out_free;
- memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+ memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
- slot = id_to_memslot(slots, mem->slot);
+ slot = id_to_memslot(slots, id);
slot->flags |= KVM_MEMSLOT_INVALID;
- old_memslots = install_new_memslots(kvm, slots);
+ old_memslots = install_new_memslots(kvm, as_id, slots);
/* slot was deleted or moved, clear iommu mapping */
kvm_iommu_unmap_pages(kvm, &old);
@@ -898,18 +910,18 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (r)
goto out_slots;
- /* actual memory is freed via old in kvm_free_physmem_slot below */
+ /* actual memory is freed via old in kvm_free_memslot below */
if (change == KVM_MR_DELETE) {
new.dirty_bitmap = NULL;
memset(&new.arch, 0, sizeof(new.arch));
}
update_memslots(slots, &new);
- old_memslots = install_new_memslots(kvm, slots);
+ old_memslots = install_new_memslots(kvm, as_id, slots);
- kvm_arch_commit_memory_region(kvm, mem, &old, change);
+ kvm_arch_commit_memory_region(kvm, mem, &old, &new, change);
- kvm_free_physmem_slot(kvm, &old, &new);
+ kvm_free_memslot(kvm, &old, &new);
kvfree(old_memslots);
/*
@@ -931,14 +943,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
out_slots:
kvfree(slots);
out_free:
- kvm_free_physmem_slot(kvm, &new, &old);
+ kvm_free_memslot(kvm, &new, &old);
out:
return r;
}
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
int kvm_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem)
{
int r;
@@ -952,24 +964,29 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region);
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
- if (mem->slot >= KVM_USER_MEM_SLOTS)
+ if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
return -EINVAL;
+
return kvm_set_memory_region(kvm, mem);
}
int kvm_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log, int *is_dirty)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int r, i;
+ int r, i, as_id, id;
unsigned long n;
unsigned long any = 0;
r = -EINVAL;
- if (log->slot >= KVM_USER_MEM_SLOTS)
+ as_id = log->slot >> 16;
+ id = (u16)log->slot;
+ if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
goto out;
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = __kvm_memslots(kvm, as_id);
+ memslot = id_to_memslot(slots, id);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
@@ -1018,17 +1035,21 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
int kvm_get_dirty_log_protect(struct kvm *kvm,
struct kvm_dirty_log *log, bool *is_dirty)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int r, i;
+ int r, i, as_id, id;
unsigned long n;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_buffer;
r = -EINVAL;
- if (log->slot >= KVM_USER_MEM_SLOTS)
+ as_id = log->slot >> 16;
+ id = (u16)log->slot;
+ if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
goto out;
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = __kvm_memslots(kvm, as_id);
+ memslot = id_to_memslot(slots, id);
dirty_bitmap = memslot->dirty_bitmap;
r = -ENOENT;
@@ -1091,6 +1112,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_memslot);
+struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
+}
+
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
@@ -1166,6 +1192,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
+unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ return gfn_to_hva_many(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, NULL);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva);
+
/*
* If writable is set to false, the hva returned by this function is only
* allowed to be read.
@@ -1188,6 +1220,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
return gfn_to_hva_memslot_prot(slot, gfn, writable);
}
+unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable)
+{
+ struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+
+ return gfn_to_hva_memslot_prot(slot, gfn, writable);
+}
+
static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int write, struct page **page)
{
@@ -1377,54 +1416,57 @@ pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
}
EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
-static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic,
- bool write_fault, bool *writable)
+pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
+ bool *writable)
{
- struct kvm_memory_slot *slot;
-
- slot = gfn_to_memslot(kvm, gfn);
+ return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
+ write_fault, writable);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
- return __gfn_to_pfn_memslot(slot, gfn, atomic, NULL, write_fault,
- writable);
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
-pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
+pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, true, true, NULL);
+ return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
+EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, false, true, NULL);
+ return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn);
+EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
-pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
- bool *writable)
+pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, false, write_fault, writable);
+ return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn_atomic);
-pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
+ return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn);
}
+EXPORT_SYMBOL_GPL(gfn_to_pfn);
-pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
+ return gfn_to_pfn_memslot(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn);
-int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
- int nr_pages)
+int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
+ struct page **pages, int nr_pages)
{
unsigned long addr;
gfn_t entry;
- addr = gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, &entry);
+ addr = gfn_to_hva_many(slot, gfn, &entry);
if (kvm_is_error_hva(addr))
return -1;
@@ -1458,6 +1500,16 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_page);
+struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ pfn_t pfn;
+
+ pfn = kvm_vcpu_gfn_to_pfn(vcpu, gfn);
+
+ return kvm_pfn_to_page(pfn);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_page);
+
void kvm_release_page_clean(struct page *page)
{
WARN_ON(is_error_page(page));
@@ -1520,13 +1572,13 @@ static int next_segment(unsigned long len, int offset)
return len;
}
-int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
- int len)
+static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
+ void *data, int offset, int len)
{
int r;
unsigned long addr;
- addr = gfn_to_hva_prot(kvm, gfn, NULL);
+ addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
r = __copy_from_user(data, (void __user *)addr + offset, len);
@@ -1534,8 +1586,25 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
return -EFAULT;
return 0;
}
+
+int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
+ int len)
+{
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+ return __kvm_read_guest_page(slot, gfn, data, offset, len);
+}
EXPORT_SYMBOL_GPL(kvm_read_guest_page);
+int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
+ int offset, int len)
+{
+ struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+
+ return __kvm_read_guest_page(slot, gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
+
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
@@ -1556,15 +1625,33 @@ int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_read_guest);
-int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
- unsigned long len)
+int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len)
{
- int r;
- unsigned long addr;
gfn_t gfn = gpa >> PAGE_SHIFT;
+ int seg;
int offset = offset_in_page(gpa);
+ int ret;
+
+ while ((seg = next_segment(len, offset)) != 0) {
+ ret = kvm_vcpu_read_guest_page(vcpu, gfn, data, offset, seg);
+ if (ret < 0)
+ return ret;
+ offset = 0;
+ len -= seg;
+ data += seg;
+ ++gfn;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest);
- addr = gfn_to_hva_prot(kvm, gfn, NULL);
+static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
+ void *data, int offset, unsigned long len)
+{
+ int r;
+ unsigned long addr;
+
+ addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
pagefault_disable();
@@ -1574,27 +1661,63 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
return -EFAULT;
return 0;
}
-EXPORT_SYMBOL(kvm_read_guest_atomic);
-int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
- int offset, int len)
+int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
+ unsigned long len)
+{
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ int offset = offset_in_page(gpa);
+
+ return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest_atomic);
+
+int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
+ void *data, unsigned long len)
+{
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ int offset = offset_in_page(gpa);
+
+ return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
+
+static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
+ const void *data, int offset, int len)
{
int r;
- struct kvm_memory_slot *memslot;
unsigned long addr;
- memslot = gfn_to_memslot(kvm, gfn);
addr = gfn_to_hva_memslot(memslot, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
r = __copy_to_user((void __user *)addr + offset, data, len);
if (r)
return -EFAULT;
- mark_page_dirty_in_slot(kvm, memslot, gfn);
+ mark_page_dirty_in_slot(memslot, gfn);
return 0;
}
+
+int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
+ const void *data, int offset, int len)
+{
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+ return __kvm_write_guest_page(slot, gfn, data, offset, len);
+}
EXPORT_SYMBOL_GPL(kvm_write_guest_page);
+int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+ const void *data, int offset, int len)
+{
+ struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+
+ return __kvm_write_guest_page(slot, gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
+
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
unsigned long len)
{
@@ -1616,6 +1739,27 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
}
EXPORT_SYMBOL_GPL(kvm_write_guest);
+int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
+ unsigned long len)
+{
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ int seg;
+ int offset = offset_in_page(gpa);
+ int ret;
+
+ while ((seg = next_segment(len, offset)) != 0) {
+ ret = kvm_vcpu_write_guest_page(vcpu, gfn, data, offset, seg);
+ if (ret < 0)
+ return ret;
+ offset = 0;
+ len -= seg;
+ data += seg;
+ ++gfn;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest);
+
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
gpa_t gpa, unsigned long len)
{
@@ -1673,7 +1817,7 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
r = __copy_to_user((void __user *)ghc->hva, data, len);
if (r)
return -EFAULT;
- mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+ mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT);
return 0;
}
@@ -1731,8 +1875,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_clear_guest);
-static void mark_page_dirty_in_slot(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
+static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot,
gfn_t gfn)
{
if (memslot && memslot->dirty_bitmap) {
@@ -1747,10 +1890,19 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *memslot;
memslot = gfn_to_memslot(kvm, gfn);
- mark_page_dirty_in_slot(kvm, memslot, gfn);
+ mark_page_dirty_in_slot(memslot, gfn);
}
EXPORT_SYMBOL_GPL(mark_page_dirty);
+void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ struct kvm_memory_slot *memslot;
+
+ memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ mark_page_dirty_in_slot(memslot, gfn);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
+
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
{
if (kvm_arch_vcpu_runnable(vcpu)) {
@@ -2480,6 +2632,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_IRQ_ROUTING:
return KVM_MAX_IRQ_ROUTES;
#endif
+#if KVM_ADDRESS_SPACE_NUM > 1
+ case KVM_CAP_MULTI_ADDRESS_SPACE:
+ return KVM_ADDRESS_SPACE_NUM;
+#endif
default:
break;
}
OpenPOWER on IntegriCloud