summaryrefslogtreecommitdiffstats
path: root/sys/amd64/vmm/vmm.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64/vmm/vmm.c')
-rw-r--r--sys/amd64/vmm/vmm.c234
1 files changed, 207 insertions, 27 deletions
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index f471218b..2c86068 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -124,19 +124,25 @@ struct vm {
* An active vcpu is one that has been started implicitly (BSP) or
* explicitly (AP) by sending it a startup ipi.
*/
- cpuset_t active_cpus;
+ volatile cpuset_t active_cpus;
+
+ struct mtx rendezvous_mtx;
+ cpuset_t rendezvous_req_cpus;
+ cpuset_t rendezvous_done_cpus;
+ void *rendezvous_arg;
+ vm_rendezvous_func_t rendezvous_func;
};
static int vmm_initialized;
static struct vmm_ops *ops;
-#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0)
+#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0)
#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0)
#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
-#define VMRUN(vmi, vcpu, rip, pmap) \
- (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO)
+#define VMRUN(vmi, vcpu, rip, pmap, rptr) \
+ (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr) : ENXIO)
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
#define VMSPACE_ALLOC(min, max) \
(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
@@ -156,6 +162,10 @@ static struct vmm_ops *ops;
(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
#define VMSETCAP(vmi, vcpu, num, val) \
(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
+#define VLAPIC_INIT(vmi, vcpu) \
+ (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
+#define VLAPIC_CLEANUP(vmi, vlapic) \
+ (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS)
#define fpu_stop_emulating() clts()
@@ -166,10 +176,20 @@ CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+
+static int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+ "IPI vector used for vcpu notifications");
+
+static void vm_deactivate_cpu(struct vm *vm, int vcpuid);
+
static void
-vcpu_cleanup(struct vcpu *vcpu)
+vcpu_cleanup(struct vm *vm, int i)
{
- vlapic_cleanup(vcpu->vlapic);
+ struct vcpu *vcpu = &vm->vcpu[i];
+
+ VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
vmm_stat_free(vcpu->stats);
fpu_save_area_free(vcpu->guestfpu);
}
@@ -184,7 +204,7 @@ vcpu_init(struct vm *vm, uint32_t vcpu_id)
vcpu_lock_init(vcpu);
vcpu->hostcpu = NOCPU;
vcpu->vcpuid = vcpu_id;
- vcpu->vlapic = vlapic_init(vm, vcpu_id);
+ vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
vcpu->guestfpu = fpu_save_area_alloc();
fpu_save_area_reset(vcpu->guestfpu);
@@ -216,7 +236,10 @@ vmm_init(void)
int error;
vmm_host_state_init();
- vmm_ipi_init();
+
+ vmm_ipinum = vmm_ipi_alloc();
+ if (vmm_ipinum == 0)
+ vmm_ipinum = IPI_AST;
error = vmm_mem_init();
if (error)
@@ -232,7 +255,7 @@ vmm_init(void)
vmm_msr_init();
vmm_resume_p = vmm_resume;
- return (VMM_INIT());
+ return (VMM_INIT(vmm_ipinum));
}
static int
@@ -253,7 +276,8 @@ vmm_handler(module_t mod, int what, void *arg)
if (error == 0) {
vmm_resume_p = NULL;
iommu_cleanup();
- vmm_ipi_cleanup();
+ if (vmm_ipinum != IPI_AST)
+ vmm_ipi_free(vmm_ipinum);
error = VMM_CLEANUP();
/*
* Something bad happened - prevent new
@@ -288,8 +312,6 @@ static moduledata_t vmm_kmod = {
DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
MODULE_VERSION(vmm, 1);
-SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
-
int
vm_create(const char *name, struct vm **retvm)
{
@@ -315,6 +337,8 @@ vm_create(const char *name, struct vm **retvm)
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
+ vm->vmspace = vmspace;
+ mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
vm->vioapic = vioapic_init(vm);
vm->vhpet = vhpet_init(vm);
@@ -325,7 +349,6 @@ vm_create(const char *name, struct vm **retvm)
}
vm_activate_cpu(vm, BSP);
- vm->vmspace = vmspace;
*retvm = vm;
return (0);
@@ -360,7 +383,7 @@ vm_destroy(struct vm *vm)
vm->num_mem_segs = 0;
for (i = 0; i < VM_MAXCPU; i++)
- vcpu_cleanup(&vm->vcpu[i]);
+ vcpu_cleanup(vm, i);
VMSPACE_FREE(vm->vmspace);
@@ -866,6 +889,63 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
panic("Error %d setting state to %d", error, newstate);
}
+static void
+vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
+{
+
+ KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
+
+ /*
+ * Update 'rendezvous_func' and execute a write memory barrier to
+ * ensure that it is visible across all host cpus. This is not needed
+ * for correctness but it does ensure that all the vcpus will notice
+ * that the rendezvous is requested immediately.
+ */
+ vm->rendezvous_func = func;
+ wmb();
+}
+
+#define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \
+ do { \
+ if (vcpuid >= 0) \
+ VCPU_CTR0(vm, vcpuid, fmt); \
+ else \
+ VM_CTR0(vm, fmt); \
+ } while (0)
+
+static void
+vm_handle_rendezvous(struct vm *vm, int vcpuid)
+{
+
+ KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
+ ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
+
+ mtx_lock(&vm->rendezvous_mtx);
+ while (vm->rendezvous_func != NULL) {
+ /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
+ CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
+
+ if (vcpuid != -1 &&
+ CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
+ !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
+ VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
+ (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
+ CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
+ }
+ if (CPU_CMP(&vm->rendezvous_req_cpus,
+ &vm->rendezvous_done_cpus) == 0) {
+ VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
+ vm_set_rendezvous_func(vm, NULL);
+ wakeup(&vm->rendezvous_func);
+ break;
+ }
+ RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
+ mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
+ "vmrndv", 0);
+ }
+ mtx_unlock(&vm->rendezvous_mtx);
+}
+
/*
* Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
*/
@@ -874,9 +954,10 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
{
struct vm_exit *vmexit;
struct vcpu *vcpu;
- int t, timo;
+ int t, timo, spindown;
vcpu = &vm->vcpu[vcpuid];
+ spindown = 0;
vcpu_lock(vcpu);
@@ -888,7 +969,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
* returned from VMRUN() and before we grabbed the vcpu lock.
*/
if (!vm_nmi_pending(vm, vcpuid) &&
- (intr_disabled || vlapic_pending_intr(vcpu->vlapic) < 0)) {
+ (intr_disabled || !vlapic_pending_intr(vcpu->vlapic, NULL))) {
t = ticks;
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
if (vlapic_enabled(vcpu->vlapic)) {
@@ -903,16 +984,25 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
* Spindown the vcpu if the apic is disabled and it
* had entered the halted state.
*/
- *retu = true;
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
- VCPU_CTR0(vm, vcpuid, "spinning down cpu");
+ spindown = 1;
}
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
}
vcpu_unlock(vcpu);
+ /*
+ * Since 'vm_deactivate_cpu()' grabs a sleep mutex we must call it
+ * outside the confines of the vcpu spinlock.
+ */
+ if (spindown) {
+ *retu = true;
+ vmexit = vm_exitinfo(vm, vcpuid);
+ vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
+ vm_deactivate_cpu(vm, vcpuid);
+ VCPU_CTR0(vm, vcpuid, "spinning down cpu");
+ }
+
return (0);
}
@@ -1042,7 +1132,7 @@ restart:
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
vcpu->hostcpu = curcpu;
- error = VMRUN(vm->cookie, vcpuid, rip, pmap);
+ error = VMRUN(vm->cookie, vcpuid, rip, pmap, &vm->rendezvous_func);
vcpu->hostcpu = NOCPU;
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
@@ -1056,6 +1146,14 @@ restart:
if (error == 0) {
retu = false;
switch (vme->exitcode) {
+ case VM_EXITCODE_IOAPIC_EOI:
+ vioapic_process_eoi(vm, vcpuid,
+ vme->u.ioapic_eoi.vector);
+ break;
+ case VM_EXITCODE_RENDEZVOUS:
+ vm_handle_rendezvous(vm, vcpuid);
+ error = 0;
+ break;
case VM_EXITCODE_HLT:
intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
@@ -1111,7 +1209,7 @@ vm_inject_nmi(struct vm *vm, int vcpuid)
vcpu = &vm->vcpu[vcpuid];
vcpu->nmi_pending = 1;
- vcpu_notify_event(vm, vcpuid);
+ vcpu_notify_event(vm, vcpuid, false);
return (0);
}
@@ -1286,8 +1384,37 @@ void
vm_activate_cpu(struct vm *vm, int vcpuid)
{
- if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
- CPU_SET(vcpuid, &vm->active_cpus);
+ KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU,
+ ("vm_activate_cpu: invalid vcpuid %d", vcpuid));
+ KASSERT(!CPU_ISSET(vcpuid, &vm->active_cpus),
+ ("vm_activate_cpu: vcpuid %d is already active", vcpuid));
+
+ VCPU_CTR0(vm, vcpuid, "activated");
+ CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
+}
+
+static void
+vm_deactivate_cpu(struct vm *vm, int vcpuid)
+{
+
+ KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU,
+ ("vm_deactivate_cpu: invalid vcpuid %d", vcpuid));
+ KASSERT(CPU_ISSET(vcpuid, &vm->active_cpus),
+ ("vm_deactivate_cpu: vcpuid %d is not active", vcpuid));
+
+ VCPU_CTR0(vm, vcpuid, "deactivated");
+ CPU_CLR_ATOMIC(vcpuid, &vm->active_cpus);
+
+ /*
+ * If a vcpu rendezvous is in progress then it could be blocked
+ * on 'vcpuid' - unblock it before disappearing forever.
+ */
+ mtx_lock(&vm->rendezvous_mtx);
+ if (vm->rendezvous_func != NULL) {
+ VCPU_CTR0(vm, vcpuid, "unblock rendezvous after deactivation");
+ wakeup(&vm->rendezvous_func);
+ }
+ mtx_unlock(&vm->rendezvous_mtx);
}
cpuset_t
@@ -1339,7 +1466,7 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
* to the host_cpu to cause the vcpu to trap into the hypervisor.
*/
void
-vcpu_notify_event(struct vm *vm, int vcpuid)
+vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
{
int hostcpu;
struct vcpu *vcpu;
@@ -1354,8 +1481,13 @@ vcpu_notify_event(struct vm *vm, int vcpuid)
} else {
if (vcpu->state != VCPU_RUNNING)
panic("invalid vcpu state %d", vcpu->state);
- if (hostcpu != curcpu)
- ipi_cpu(hostcpu, vmm_ipinum);
+ if (hostcpu != curcpu) {
+ if (lapic_intr)
+ vlapic_post_intr(vcpu->vlapic, hostcpu,
+ vmm_ipinum);
+ else
+ ipi_cpu(hostcpu, vmm_ipinum);
+ }
}
vcpu_unlock(vcpu);
}
@@ -1375,3 +1507,51 @@ vm_apicid2vcpuid(struct vm *vm, int apicid)
*/
return (apicid);
}
+
+void
+vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
+ vm_rendezvous_func_t func, void *arg)
+{
+ int i;
+
+ /*
+ * Enforce that this function is called without any locks
+ */
+ WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
+ KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
+ ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
+
+restart:
+ mtx_lock(&vm->rendezvous_mtx);
+ if (vm->rendezvous_func != NULL) {
+ /*
+ * If a rendezvous is already in progress then we need to
+ * call the rendezvous handler in case this 'vcpuid' is one
+ * of the targets of the rendezvous.
+ */
+ RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
+ mtx_unlock(&vm->rendezvous_mtx);
+ vm_handle_rendezvous(vm, vcpuid);
+ goto restart;
+ }
+ KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
+ "rendezvous is still in progress"));
+
+ RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
+ vm->rendezvous_req_cpus = dest;
+ CPU_ZERO(&vm->rendezvous_done_cpus);
+ vm->rendezvous_arg = arg;
+ vm_set_rendezvous_func(vm, func);
+ mtx_unlock(&vm->rendezvous_mtx);
+
+ /*
+ * Wake up any sleeping vcpus and trigger a VM-exit in any running
+ * vcpus so they handle the rendezvous as soon as possible.
+ */
+ for (i = 0; i < VM_MAXCPU; i++) {
+ if (CPU_ISSET(i, &dest))
+ vcpu_notify_event(vm, i, false);
+ }
+
+ vm_handle_rendezvous(vm, vcpuid);
+}
OpenPOWER on IntegriCloud