diff options
Diffstat (limited to 'sys/amd64/vmm/vmm.c')
-rw-r--r-- | sys/amd64/vmm/vmm.c | 234 |
1 files changed, 207 insertions, 27 deletions
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index f471218b..2c86068 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -124,19 +124,25 @@ struct vm { * An active vcpu is one that has been started implicitly (BSP) or * explicitly (AP) by sending it a startup ipi. */ - cpuset_t active_cpus; + volatile cpuset_t active_cpus; + + struct mtx rendezvous_mtx; + cpuset_t rendezvous_req_cpus; + cpuset_t rendezvous_done_cpus; + void *rendezvous_arg; + vm_rendezvous_func_t rendezvous_func; }; static int vmm_initialized; static struct vmm_ops *ops; -#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) +#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) -#define VMRUN(vmi, vcpu, rip, pmap) \ - (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO) +#define VMRUN(vmi, vcpu, rip, pmap, rptr) \ + (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr) : ENXIO) #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) #define VMSPACE_ALLOC(min, max) \ (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) @@ -156,6 +162,10 @@ static struct vmm_ops *ops; (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) #define VMSETCAP(vmi, vcpu, num, val) \ (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) +#define VLAPIC_INIT(vmi, vcpu) \ + (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) +#define VLAPIC_CLEANUP(vmi, vlapic) \ + (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) #define fpu_stop_emulating() clts() @@ -166,10 +176,20 @@ CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ /* statistics */ static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); +SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); + +static int vmm_ipinum; +SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, + "IPI vector used for vcpu notifications"); + +static void vm_deactivate_cpu(struct vm *vm, int vcpuid); + static void -vcpu_cleanup(struct vcpu *vcpu) +vcpu_cleanup(struct vm *vm, int i) { - vlapic_cleanup(vcpu->vlapic); + struct vcpu *vcpu = &vm->vcpu[i]; + + VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); } @@ -184,7 +204,7 @@ vcpu_init(struct vm *vm, uint32_t vcpu_id) vcpu_lock_init(vcpu); vcpu->hostcpu = NOCPU; vcpu->vcpuid = vcpu_id; - vcpu->vlapic = vlapic_init(vm, vcpu_id); + vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); vcpu->guestfpu = fpu_save_area_alloc(); fpu_save_area_reset(vcpu->guestfpu); @@ -216,7 +236,10 @@ vmm_init(void) int error; vmm_host_state_init(); - vmm_ipi_init(); + + vmm_ipinum = vmm_ipi_alloc(); + if (vmm_ipinum == 0) + vmm_ipinum = IPI_AST; error = vmm_mem_init(); if (error) @@ -232,7 +255,7 @@ vmm_init(void) vmm_msr_init(); vmm_resume_p = vmm_resume; - return (VMM_INIT()); + return (VMM_INIT(vmm_ipinum)); } static int @@ -253,7 +276,8 @@ vmm_handler(module_t mod, int what, void *arg) if (error == 0) { vmm_resume_p = NULL; iommu_cleanup(); - vmm_ipi_cleanup(); + if (vmm_ipinum != IPI_AST) + vmm_ipi_free(vmm_ipinum); error = VMM_CLEANUP(); /* * Something bad happened - prevent new @@ -288,8 +312,6 @@ static moduledata_t vmm_kmod = { DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); MODULE_VERSION(vmm, 1); -SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); - int vm_create(const char *name, struct vm **retvm) { @@ -315,6 +337,8 @@ vm_create(const char *name, struct vm **retvm) vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); strcpy(vm->name, name); + vm->vmspace = vmspace; + mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); vm->cookie = VMINIT(vm, vmspace_pmap(vmspace)); vm->vioapic = vioapic_init(vm); vm->vhpet = vhpet_init(vm); @@ -325,7 +349,6 @@ vm_create(const char *name, struct vm **retvm) } vm_activate_cpu(vm, BSP); - vm->vmspace = vmspace; *retvm = vm; return (0); @@ -360,7 +383,7 @@ vm_destroy(struct vm *vm) vm->num_mem_segs = 0; for (i = 0; i < VM_MAXCPU; i++) - vcpu_cleanup(&vm->vcpu[i]); + vcpu_cleanup(vm, i); VMSPACE_FREE(vm->vmspace); @@ -866,6 +889,63 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) panic("Error %d setting state to %d", error, newstate); } +static void +vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) +{ + + KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); + + /* + * Update 'rendezvous_func' and execute a write memory barrier to + * ensure that it is visible across all host cpus. This is not needed + * for correctness but it does ensure that all the vcpus will notice + * that the rendezvous is requested immediately. + */ + vm->rendezvous_func = func; + wmb(); +} + +#define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ + do { \ + if (vcpuid >= 0) \ + VCPU_CTR0(vm, vcpuid, fmt); \ + else \ + VM_CTR0(vm, fmt); \ + } while (0) + +static void +vm_handle_rendezvous(struct vm *vm, int vcpuid) +{ + + KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), + ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); + + mtx_lock(&vm->rendezvous_mtx); + while (vm->rendezvous_func != NULL) { + /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ + CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); + + if (vcpuid != -1 && + CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && + !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { + VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); + (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); + CPU_SET(vcpuid, &vm->rendezvous_done_cpus); + } + if (CPU_CMP(&vm->rendezvous_req_cpus, + &vm->rendezvous_done_cpus) == 0) { + VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); + vm_set_rendezvous_func(vm, NULL); + wakeup(&vm->rendezvous_func); + break; + } + RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); + mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, + "vmrndv", 0); + } + mtx_unlock(&vm->rendezvous_mtx); +} + /* * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. */ @@ -874,9 +954,10 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) { struct vm_exit *vmexit; struct vcpu *vcpu; - int t, timo; + int t, timo, spindown; vcpu = &vm->vcpu[vcpuid]; + spindown = 0; vcpu_lock(vcpu); @@ -888,7 +969,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) * returned from VMRUN() and before we grabbed the vcpu lock. */ if (!vm_nmi_pending(vm, vcpuid) && - (intr_disabled || vlapic_pending_intr(vcpu->vlapic) < 0)) { + (intr_disabled || !vlapic_pending_intr(vcpu->vlapic, NULL))) { t = ticks; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); if (vlapic_enabled(vcpu->vlapic)) { @@ -903,16 +984,25 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) * Spindown the vcpu if the apic is disabled and it * had entered the halted state. */ - *retu = true; - vmexit = vm_exitinfo(vm, vcpuid); - vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU; - VCPU_CTR0(vm, vcpuid, "spinning down cpu"); + spindown = 1; } vcpu_require_state_locked(vcpu, VCPU_FROZEN); vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); } vcpu_unlock(vcpu); + /* + * Since 'vm_deactivate_cpu()' grabs a sleep mutex we must call it + * outside the confines of the vcpu spinlock. + */ + if (spindown) { + *retu = true; + vmexit = vm_exitinfo(vm, vcpuid); + vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU; + vm_deactivate_cpu(vm, vcpuid); + VCPU_CTR0(vm, vcpuid, "spinning down cpu"); + } + return (0); } @@ -1042,7 +1132,7 @@ restart: vcpu_require_state(vm, vcpuid, VCPU_RUNNING); vcpu->hostcpu = curcpu; - error = VMRUN(vm->cookie, vcpuid, rip, pmap); + error = VMRUN(vm->cookie, vcpuid, rip, pmap, &vm->rendezvous_func); vcpu->hostcpu = NOCPU; vcpu_require_state(vm, vcpuid, VCPU_FROZEN); @@ -1056,6 +1146,14 @@ restart: if (error == 0) { retu = false; switch (vme->exitcode) { + case VM_EXITCODE_IOAPIC_EOI: + vioapic_process_eoi(vm, vcpuid, + vme->u.ioapic_eoi.vector); + break; + case VM_EXITCODE_RENDEZVOUS: + vm_handle_rendezvous(vm, vcpuid); + error = 0; + break; case VM_EXITCODE_HLT: intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); @@ -1111,7 +1209,7 @@ vm_inject_nmi(struct vm *vm, int vcpuid) vcpu = &vm->vcpu[vcpuid]; vcpu->nmi_pending = 1; - vcpu_notify_event(vm, vcpuid); + vcpu_notify_event(vm, vcpuid, false); return (0); } @@ -1286,8 +1384,37 @@ void vm_activate_cpu(struct vm *vm, int vcpuid) { - if (vcpuid >= 0 && vcpuid < VM_MAXCPU) - CPU_SET(vcpuid, &vm->active_cpus); + KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, + ("vm_activate_cpu: invalid vcpuid %d", vcpuid)); + KASSERT(!CPU_ISSET(vcpuid, &vm->active_cpus), + ("vm_activate_cpu: vcpuid %d is already active", vcpuid)); + + VCPU_CTR0(vm, vcpuid, "activated"); + CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); +} + +static void +vm_deactivate_cpu(struct vm *vm, int vcpuid) +{ + + KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, + ("vm_deactivate_cpu: invalid vcpuid %d", vcpuid)); + KASSERT(CPU_ISSET(vcpuid, &vm->active_cpus), + ("vm_deactivate_cpu: vcpuid %d is not active", vcpuid)); + + VCPU_CTR0(vm, vcpuid, "deactivated"); + CPU_CLR_ATOMIC(vcpuid, &vm->active_cpus); + + /* + * If a vcpu rendezvous is in progress then it could be blocked + * on 'vcpuid' - unblock it before disappearing forever. + */ + mtx_lock(&vm->rendezvous_mtx); + if (vm->rendezvous_func != NULL) { + VCPU_CTR0(vm, vcpuid, "unblock rendezvous after deactivation"); + wakeup(&vm->rendezvous_func); + } + mtx_unlock(&vm->rendezvous_mtx); } cpuset_t @@ -1339,7 +1466,7 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) * to the host_cpu to cause the vcpu to trap into the hypervisor. */ void -vcpu_notify_event(struct vm *vm, int vcpuid) +vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) { int hostcpu; struct vcpu *vcpu; @@ -1354,8 +1481,13 @@ vcpu_notify_event(struct vm *vm, int vcpuid) } else { if (vcpu->state != VCPU_RUNNING) panic("invalid vcpu state %d", vcpu->state); - if (hostcpu != curcpu) - ipi_cpu(hostcpu, vmm_ipinum); + if (hostcpu != curcpu) { + if (lapic_intr) + vlapic_post_intr(vcpu->vlapic, hostcpu, + vmm_ipinum); + else + ipi_cpu(hostcpu, vmm_ipinum); + } } vcpu_unlock(vcpu); } @@ -1375,3 +1507,51 @@ vm_apicid2vcpuid(struct vm *vm, int apicid) */ return (apicid); } + +void +vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, + vm_rendezvous_func_t func, void *arg) +{ + int i; + + /* + * Enforce that this function is called without any locks + */ + WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); + KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), + ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); + +restart: + mtx_lock(&vm->rendezvous_mtx); + if (vm->rendezvous_func != NULL) { + /* + * If a rendezvous is already in progress then we need to + * call the rendezvous handler in case this 'vcpuid' is one + * of the targets of the rendezvous. + */ + RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); + mtx_unlock(&vm->rendezvous_mtx); + vm_handle_rendezvous(vm, vcpuid); + goto restart; + } + KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " + "rendezvous is still in progress")); + + RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); + vm->rendezvous_req_cpus = dest; + CPU_ZERO(&vm->rendezvous_done_cpus); + vm->rendezvous_arg = arg; + vm_set_rendezvous_func(vm, func); + mtx_unlock(&vm->rendezvous_mtx); + + /* + * Wake up any sleeping vcpus and trigger a VM-exit in any running + * vcpus so they handle the rendezvous as soon as possible. + */ + for (i = 0; i < VM_MAXCPU; i++) { + if (CPU_ISSET(i, &dest)) + vcpu_notify_event(vm, i, false); + } + + vm_handle_rendezvous(vm, vcpuid); +} |