From 888f6511e321556007cf471f691cb84d9a817237 Mon Sep 17 00:00:00 2001 From: jhb Date: Mon, 21 Jul 2014 02:39:17 +0000 Subject: MFC 263780,264516,265062,265101,265203,265364: Add an ioctl to suspend a virtual machine (VM_SUSPEND). Add logic in the HLT exit handler to detect if the guest has put all vcpus to sleep permanently by executing a HLT with interrupts disabled. When this condition is detected the guest with be suspended with a reason of VM_SUSPEND_HALT and the bhyve(8) process will exit. This logic can be disabled via the tunable 'hw.vmm.halt_detection'. --- lib/libvmmapi/vmmapi.c | 10 ++ lib/libvmmapi/vmmapi.h | 1 + sys/amd64/include/vmm.h | 26 ++++- sys/amd64/include/vmm_dev.h | 7 ++ sys/amd64/vmm/amd/amdv.c | 3 +- sys/amd64/vmm/intel/vmx.c | 13 ++- sys/amd64/vmm/vmm.c | 238 +++++++++++++++++++++++++++++++------------ sys/amd64/vmm/vmm_dev.c | 5 + usr.sbin/bhyve/bhyverun.c | 73 +++++++++---- usr.sbin/bhyvectl/bhyvectl.c | 13 ++- 10 files changed, 296 insertions(+), 93 deletions(-) diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 22b536a..4a7f852 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -342,6 +342,16 @@ vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) return (error); } +int +vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) +{ + struct vm_suspend vmsuspend; + + bzero(&vmsuspend, sizeof(vmsuspend)); + vmsuspend.how = how; + return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend)); +} + static int vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector, int error_code, int error_code_valid) diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index f8921d3..2a2ca6b 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -61,6 +61,7 @@ int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val); int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *ret_vmexit); +int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); int vm_inject_exception(struct vmctx *ctx, int vcpu, int vec); int vm_inject_exception2(struct vmctx *ctx, int vcpu, int vec, int errcode); diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index d50e7bc..68240b9 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -29,6 +29,14 @@ #ifndef _VMM_H_ #define _VMM_H_ +enum vm_suspend_how { + VM_SUSPEND_NONE, + VM_SUSPEND_RESET, + VM_SUSPEND_POWEROFF, + VM_SUSPEND_HALT, + VM_SUSPEND_LAST +}; + #ifdef _KERNEL #define VM_MAX_NAMELEN 32 @@ -53,7 +61,8 @@ typedef int (*vmm_cleanup_func_t)(void); typedef void (*vmm_resume_func_t)(void); typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, - struct pmap *pmap, void *rendezvous_cookie); + struct pmap *pmap, void *rendezvous_cookie, + void *suspend_cookie); typedef void (*vmi_cleanup_func_t)(void *vmi); typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, uint64_t *retval); @@ -114,6 +123,7 @@ int vm_get_seg_desc(struct vm *vm, int vcpu, int reg, int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc); int vm_run(struct vm *vm, struct vm_run *vmrun); +int vm_suspend(struct vm *vm, enum vm_suspend_how how); int vm_inject_nmi(struct vm *vm, int vcpu); int vm_nmi_pending(struct vm *vm, int vcpuid); void vm_nmi_clear(struct vm *vm, int vcpuid); @@ -132,6 +142,7 @@ int vm_apicid2vcpuid(struct vm *vm, int apicid); void vm_activate_cpu(struct vm *vm, int vcpu); cpuset_t vm_active_cpus(struct vm *vm); struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); +void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); /* * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'. @@ -158,6 +169,13 @@ vcpu_rendezvous_pending(void *rendezvous_cookie) return (*(uintptr_t *)rendezvous_cookie != 0); } +static __inline int +vcpu_suspended(void *suspend_cookie) +{ + + return (*(int *)suspend_cookie); +} + /* * Return 1 if device indicated by bus/slot/func is supposed to be a * pci passthrough device. @@ -308,9 +326,10 @@ enum vm_exitcode { VM_EXITCODE_PAGING, VM_EXITCODE_INST_EMUL, VM_EXITCODE_SPINUP_AP, - VM_EXITCODE_SPINDOWN_CPU, + VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */ VM_EXITCODE_RENDEZVOUS, VM_EXITCODE_IOAPIC_EOI, + VM_EXITCODE_SUSPENDED, VM_EXITCODE_MAX }; @@ -372,6 +391,9 @@ struct vm_exit { struct { int vector; } ioapic_eoi; + struct { + enum vm_suspend_how how; + } suspended; } u; }; diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index eda9b94..fcd437f 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -159,12 +159,17 @@ struct vm_hpet_cap { uint32_t capabilities; /* lower 32 bits of HPET capabilities */ }; +struct vm_suspend { + enum vm_suspend_how how; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, IOCNUM_RUN = 1, IOCNUM_SET_CAPABILITY = 2, IOCNUM_GET_CAPABILITY = 3, + IOCNUM_SUSPEND = 4, /* memory apis */ IOCNUM_MAP_MEMORY = 10, @@ -212,6 +217,8 @@ enum { #define VM_RUN \ _IOWR('v', IOCNUM_RUN, struct vm_run) +#define VM_SUSPEND \ + _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) #define VM_MAP_MEMORY \ _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment) #define VM_GET_MEMORY_SEG \ diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c index 39f0ef7..4c88d12 100644 --- a/sys/amd64/vmm/amd/amdv.c +++ b/sys/amd64/vmm/amd/amdv.c @@ -67,7 +67,8 @@ amdv_vminit(struct vm *vm, struct pmap *pmap) } static int -amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap, void *cookie) +amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap, + void *rptr, void *sptr) { printf("amdv_vmrun: not implemented\n"); diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 2da0ab7..1c39552 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -2136,7 +2136,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) static int vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap, - void *rendezvous_cookie) + void *rendezvous_cookie, void *suspend_cookie) { int rc, handled, launched; struct vmx *vmx; @@ -2193,9 +2193,10 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap, * pmap_invalidate_ept(). */ disable_intr(); - if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) { + if (vcpu_suspended(suspend_cookie)) { enable_intr(); - handled = vmx_exit_astpending(vmx, vcpu, vmexit); + vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip()); + handled = UNHANDLED; break; } @@ -2205,6 +2206,12 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap, break; } + if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) { + enable_intr(); + handled = vmx_exit_astpending(vmx, vcpu, vmexit); + break; + } + vmx_inject_interrupts(vmx, vcpu, vlapic); vmx_run_trace(vmx, vcpu); rc = vmx_enter_guest(vmxctx, vmx, launched); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 9d740d1..9aed5f5 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -139,6 +139,11 @@ struct vm { cpuset_t rendezvous_done_cpus; void *rendezvous_arg; vm_rendezvous_func_t rendezvous_func; + + int suspend; + volatile cpuset_t suspended_cpus; + + volatile cpuset_t halted_cpus; }; static int vmm_initialized; @@ -149,8 +154,8 @@ static struct vmm_ops *ops; #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) -#define VMRUN(vmi, vcpu, rip, pmap, rptr) \ - (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr) : ENXIO) +#define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \ + (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO) #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) #define VMSPACE_ALLOC(min, max) \ (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) @@ -184,12 +189,20 @@ static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); +/* + * Halt the guest if all vcpus are executing a HLT instruction with + * interrupts disabled. + */ +static int halt_detection_enabled = 1; +TUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled); +SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, + &halt_detection_enabled, 0, + "Halt VM if all vcpus execute HLT with interrupts disabled"); + static int vmm_ipinum; SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, "IPI vector used for vcpu notifications"); -static void vm_deactivate_cpu(struct vm *vm, int vcpuid); - static void vcpu_cleanup(struct vm *vm, int i) { @@ -1003,56 +1016,73 @@ vm_handle_rendezvous(struct vm *vm, int vcpuid) static int vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) { - struct vm_exit *vmexit; struct vcpu *vcpu; - int t, timo, spindown; + const char *wmesg; + int t, vcpu_halted, vm_halted; + + KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); vcpu = &vm->vcpu[vcpuid]; - spindown = 0; + vcpu_halted = 0; + vm_halted = 0; vcpu_lock(vcpu); + while (1) { + /* + * Do a final check for pending NMI or interrupts before + * really putting this thread to sleep. Also check for + * software events that would cause this vcpu to wakeup. + * + * These interrupts/events could have happened after the + * vcpu returned from VMRUN() and before it acquired the + * vcpu lock above. + */ + if (vm->rendezvous_func != NULL || vm->suspend) + break; + if (vm_nmi_pending(vm, vcpuid)) + break; + if (!intr_disabled) { + if (vm_extint_pending(vm, vcpuid) || + vlapic_pending_intr(vcpu->vlapic, NULL)) { + break; + } + } - /* - * Do a final check for pending NMI or interrupts before - * really putting this thread to sleep. - * - * These interrupts could have happened any time after we - * returned from VMRUN() and before we grabbed the vcpu lock. - */ - if (!vm_nmi_pending(vm, vcpuid) && - (intr_disabled || !vlapic_pending_intr(vcpu->vlapic, NULL))) { - t = ticks; - vcpu_require_state_locked(vcpu, VCPU_SLEEPING); - if (vlapic_enabled(vcpu->vlapic)) { - /* - * XXX msleep_spin() is not interruptible so use the - * 'timo' to put an upper bound on the sleep time. - */ - timo = hz; - msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo); + /* + * Some Linux guests implement "halt" by having all vcpus + * execute HLT with interrupts disabled. 'halted_cpus' keeps + * track of the vcpus that have entered this state. When all + * vcpus enter the halted state the virtual machine is halted. + */ + if (intr_disabled) { + wmesg = "vmhalt"; + VCPU_CTR0(vm, vcpuid, "Halted"); + if (!vcpu_halted && halt_detection_enabled) { + vcpu_halted = 1; + CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); + } + if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { + vm_halted = 1; + break; + } } else { - /* - * Spindown the vcpu if the apic is disabled and it - * had entered the halted state. - */ - spindown = 1; + wmesg = "vmidle"; } + + t = ticks; + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + msleep_spin(vcpu, &vcpu->mtx, wmesg, 0); vcpu_require_state_locked(vcpu, VCPU_FROZEN); vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); } + + if (vcpu_halted) + CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); + vcpu_unlock(vcpu); - /* - * Since 'vm_deactivate_cpu()' grabs a sleep mutex we must call it - * outside the confines of the vcpu spinlock. - */ - if (spindown) { - *retu = true; - vmexit = vm_exitinfo(vm, vcpuid); - vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU; - vm_deactivate_cpu(vm, vcpuid); - VCPU_CTR0(vm, vcpuid, "spinning down cpu"); - } + if (vm_halted) + vm_suspend(vm, VM_SUSPEND_HALT); return (0); } @@ -1152,6 +1182,100 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) return (error); } +static int +vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) +{ + int i, done; + struct vcpu *vcpu; + + done = 0; + vcpu = &vm->vcpu[vcpuid]; + + CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); + + /* + * Wait until all 'active_cpus' have suspended themselves. + * + * Since a VM may be suspended at any time including when one or + * more vcpus are doing a rendezvous we need to call the rendezvous + * handler while we are waiting to prevent a deadlock. + */ + vcpu_lock(vcpu); + while (1) { + if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { + VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); + break; + } + + if (vm->rendezvous_func == NULL) { + VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); + vcpu_require_state_locked(vcpu, VCPU_FROZEN); + } else { + VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); + vcpu_unlock(vcpu); + vm_handle_rendezvous(vm, vcpuid); + vcpu_lock(vcpu); + } + } + vcpu_unlock(vcpu); + + /* + * Wakeup the other sleeping vcpus and return to userspace. + */ + for (i = 0; i < VM_MAXCPU; i++) { + if (CPU_ISSET(i, &vm->suspended_cpus)) { + vcpu_notify_event(vm, i, false); + } + } + + *retu = true; + return (0); +} + +int +vm_suspend(struct vm *vm, enum vm_suspend_how how) +{ + int i; + + if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) + return (EINVAL); + + if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { + VM_CTR2(vm, "virtual machine already suspended %d/%d", + vm->suspend, how); + return (EALREADY); + } + + VM_CTR1(vm, "virtual machine successfully suspended %d", how); + + /* + * Notify all active vcpus that they are now suspended. + */ + for (i = 0; i < VM_MAXCPU; i++) { + if (CPU_ISSET(i, &vm->active_cpus)) + vcpu_notify_event(vm, i, false); + } + + return (0); +} + +void +vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) +{ + struct vm_exit *vmexit; + + KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, + ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); + + vmexit = vm_exitinfo(vm, vcpuid); + vmexit->rip = rip; + vmexit->inst_length = 0; + vmexit->exitcode = VM_EXITCODE_SUSPENDED; + vmexit->u.suspended.how = vm->suspend; +} + int vm_run(struct vm *vm, struct vm_run *vmrun) { @@ -1162,12 +1286,15 @@ vm_run(struct vm *vm, struct vm_run *vmrun) struct vm_exit *vme; bool retu, intr_disabled; pmap_t pmap; + void *rptr, *sptr; vcpuid = vmrun->cpuid; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); + rptr = &vm->rendezvous_func; + sptr = &vm->suspend; pmap = vmspace_pmap(vm->vmspace); vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; @@ -1187,7 +1314,7 @@ restart: restore_guest_fpustate(vcpu); vcpu_require_state(vm, vcpuid, VCPU_RUNNING); - error = VMRUN(vm->cookie, vcpuid, rip, pmap, &vm->rendezvous_func); + error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr); vcpu_require_state(vm, vcpuid, VCPU_FROZEN); save_guest_fpustate(vcpu); @@ -1200,6 +1327,9 @@ restart: if (error == 0) { retu = false; switch (vme->exitcode) { + case VM_EXITCODE_SUSPENDED: + error = vm_handle_suspend(vm, vcpuid, &retu); + break; case VM_EXITCODE_IOAPIC_EOI: vioapic_process_eoi(vm, vcpuid, vme->u.ioapic_eoi.vector); @@ -1567,30 +1697,6 @@ vm_activate_cpu(struct vm *vm, int vcpuid) CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); } -static void -vm_deactivate_cpu(struct vm *vm, int vcpuid) -{ - - KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, - ("vm_deactivate_cpu: invalid vcpuid %d", vcpuid)); - KASSERT(CPU_ISSET(vcpuid, &vm->active_cpus), - ("vm_deactivate_cpu: vcpuid %d is not active", vcpuid)); - - VCPU_CTR0(vm, vcpuid, "deactivated"); - CPU_CLR_ATOMIC(vcpuid, &vm->active_cpus); - - /* - * If a vcpu rendezvous is in progress then it could be blocked - * on 'vcpuid' - unblock it before disappearing forever. - */ - mtx_lock(&vm->rendezvous_mtx); - if (vm->rendezvous_func != NULL) { - VCPU_CTR0(vm, vcpuid, "unblock rendezvous after deactivation"); - wakeup(&vm->rendezvous_func); - } - mtx_unlock(&vm->rendezvous_mtx); -} - cpuset_t vm_active_cpus(struct vm *vm) { diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 6defd13..3112c52 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -166,6 +166,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_stat_desc *statdesc; struct vm_x2apic *x2apic; struct vm_gpa_pte *gpapte; + struct vm_suspend *vmsuspend; sc = vmmdev_lookup2(cdev); if (sc == NULL) @@ -240,6 +241,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vmrun = (struct vm_run *)data; error = vm_run(sc->vm, vmrun); break; + case VM_SUSPEND: + vmsuspend = (struct vm_suspend *)data; + error = vm_suspend(sc->vm, vmsuspend->how); + break; case VM_STAT_DESC: { statdesc = (struct vm_stat_desc *)data; error = vmm_stat_desc_copy(statdesc->index, diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 1bd6a20..e662ca3 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -113,6 +113,7 @@ struct bhyvestats { uint64_t cpu_switch_rotate; uint64_t cpu_switch_direct; int io_reset; + int io_poweroff; } stats; struct mt_vmm_info { @@ -271,13 +272,6 @@ fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) } static int -vmexit_catch_reset(void) -{ - stats.io_reset++; - return (VMEXIT_RESET); -} - -static int vmexit_catch_inout(void) { return (VMEXIT_ABORT); @@ -327,8 +321,10 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) case INOUT_OK: return (VMEXIT_CONTINUE); case INOUT_RESET: + stats.io_reset++; return (VMEXIT_RESET); case INOUT_POWEROFF: + stats.io_poweroff++; return (VMEXIT_POWEROFF); default: fprintf(stderr, "Unhandled %s%c 0x%04x\n", @@ -399,17 +395,6 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) } static int -vmexit_spindown_cpu(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) -{ - int lastcpu; - - lastcpu = fbsdrun_deletecpu(ctx, *pvcpu); - if (!lastcpu) - pthread_exit(NULL); - return (vmexit_catch_reset()); -} - -static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { @@ -493,6 +478,45 @@ vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) return (VMEXIT_CONTINUE); } +static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; + +static int +vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + enum vm_suspend_how how; + + how = vmexit->u.suspended.how; + + fbsdrun_deletecpu(ctx, *pvcpu); + + if (*pvcpu != BSP) { + pthread_mutex_lock(&resetcpu_mtx); + pthread_cond_signal(&resetcpu_cond); + pthread_mutex_unlock(&resetcpu_mtx); + pthread_exit(NULL); + } + + pthread_mutex_lock(&resetcpu_mtx); + while (!CPU_EMPTY(&cpumask)) { + pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); + } + pthread_mutex_unlock(&resetcpu_mtx); + + switch (how) { + case VM_SUSPEND_RESET: + exit(0); + case VM_SUSPEND_POWEROFF: + exit(1); + case VM_SUSPEND_HALT: + exit(2); + default: + fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); + exit(100); + } + return (0); /* NOTREACHED */ +} + static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_VMX] = vmexit_vmx, @@ -502,7 +526,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_MTRAP] = vmexit_mtrap, [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, - [VM_EXITCODE_SPINDOWN_CPU] = vmexit_spindown_cpu, + [VM_EXITCODE_SUSPENDED] = vmexit_suspend }; static void @@ -510,6 +534,7 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) { int error, rc, prevcpu; enum vm_exitcode exitcode; + enum vm_suspend_how how; if (vcpumap[vcpu] != NULL) { error = pthread_setaffinity_np(pthread_self(), @@ -541,7 +566,15 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) rip = vmexit[vcpu].rip; break; case VMEXIT_RESET: - exit(0); + case VMEXIT_POWEROFF: + if (rc == VMEXIT_RESET) + how = VM_SUSPEND_RESET; + else + how = VM_SUSPEND_POWEROFF; + error = vm_suspend(ctx, how); + assert(error == 0 || errno == EALREADY); + rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; + break; default: exit(1); } diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c index 50a71bb..57d6e38 100644 --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -191,13 +191,16 @@ usage(void) " [--get-highmem]\n" " [--get-gpa-pmap]\n" " [--assert-lapic-lvt=]\n" - " [--inject-nmi]\n", + " [--inject-nmi]\n" + " [--force-reset]\n" + " [--force-poweroff]\n", progname); exit(1); } static int get_stats, getcap, setcap, capval, get_gpa_pmap; static int inject_nmi, assert_lapic_lvt; +static int force_reset, force_poweroff; static const char *capname; static int create, destroy, get_lowmem, get_highmem; static uint64_t memsize; @@ -565,6 +568,8 @@ main(int argc, char *argv[]) { "create", NO_ARG, &create, 1 }, { "destroy", NO_ARG, &destroy, 1 }, { "inject-nmi", NO_ARG, &inject_nmi, 1 }, + { "force-reset", NO_ARG, &force_reset, 1 }, + { "force-poweroff", NO_ARG, &force_poweroff, 1 }, { NULL, 0, NULL, 0 } }; @@ -1534,6 +1539,12 @@ main(int argc, char *argv[]) printf("vm_run error %d\n", error); } + if (!error && force_reset) + error = vm_suspend(ctx, VM_SUSPEND_RESET); + + if (!error && force_poweroff) + error = vm_suspend(ctx, VM_SUSPEND_POWEROFF); + if (error) printf("errno = %d\n", errno); -- cgit v1.1