summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2014-07-21 02:39:17 +0000
committerjhb <jhb@FreeBSD.org>2014-07-21 02:39:17 +0000
commit888f6511e321556007cf471f691cb84d9a817237 (patch)
tree84ace0524c020288c47a07096fb6abcb1d3387d4
parentd034cf40e56b09120dc35d432b7fdc536c54f6ec (diff)
downloadFreeBSD-src-888f6511e321556007cf471f691cb84d9a817237.zip
FreeBSD-src-888f6511e321556007cf471f691cb84d9a817237.tar.gz
MFC 263780,264516,265062,265101,265203,265364:
Add an ioctl to suspend a virtual machine (VM_SUSPEND). Add logic in the HLT exit handler to detect if the guest has put all vcpus to sleep permanently by executing a HLT with interrupts disabled. When this condition is detected the guest with be suspended with a reason of VM_SUSPEND_HALT and the bhyve(8) process will exit. This logic can be disabled via the tunable 'hw.vmm.halt_detection'.
-rw-r--r--lib/libvmmapi/vmmapi.c10
-rw-r--r--lib/libvmmapi/vmmapi.h1
-rw-r--r--sys/amd64/include/vmm.h26
-rw-r--r--sys/amd64/include/vmm_dev.h7
-rw-r--r--sys/amd64/vmm/amd/amdv.c3
-rw-r--r--sys/amd64/vmm/intel/vmx.c13
-rw-r--r--sys/amd64/vmm/vmm.c238
-rw-r--r--sys/amd64/vmm/vmm_dev.c5
-rw-r--r--usr.sbin/bhyve/bhyverun.c73
-rw-r--r--usr.sbin/bhyvectl/bhyvectl.c13
10 files changed, 296 insertions, 93 deletions
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
index 22b536a..4a7f852 100644
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -342,6 +342,16 @@ vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
return (error);
}
+int
+vm_suspend(struct vmctx *ctx, enum vm_suspend_how how)
+{
+ struct vm_suspend vmsuspend;
+
+ bzero(&vmsuspend, sizeof(vmsuspend));
+ vmsuspend.how = how;
+ return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend));
+}
+
static int
vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector,
int error_code, int error_code_valid)
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
index f8921d3..2a2ca6b 100644
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -61,6 +61,7 @@ int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
struct vm_exit *ret_vmexit);
+int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how);
int vm_apicid2vcpu(struct vmctx *ctx, int apicid);
int vm_inject_exception(struct vmctx *ctx, int vcpu, int vec);
int vm_inject_exception2(struct vmctx *ctx, int vcpu, int vec, int errcode);
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index d50e7bc..68240b9 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -29,6 +29,14 @@
#ifndef _VMM_H_
#define _VMM_H_
+enum vm_suspend_how {
+ VM_SUSPEND_NONE,
+ VM_SUSPEND_RESET,
+ VM_SUSPEND_POWEROFF,
+ VM_SUSPEND_HALT,
+ VM_SUSPEND_LAST
+};
+
#ifdef _KERNEL
#define VM_MAX_NAMELEN 32
@@ -53,7 +61,8 @@ typedef int (*vmm_cleanup_func_t)(void);
typedef void (*vmm_resume_func_t)(void);
typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
- struct pmap *pmap, void *rendezvous_cookie);
+ struct pmap *pmap, void *rendezvous_cookie,
+ void *suspend_cookie);
typedef void (*vmi_cleanup_func_t)(void *vmi);
typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
uint64_t *retval);
@@ -114,6 +123,7 @@ int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *desc);
int vm_run(struct vm *vm, struct vm_run *vmrun);
+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
int vm_inject_nmi(struct vm *vm, int vcpu);
int vm_nmi_pending(struct vm *vm, int vcpuid);
void vm_nmi_clear(struct vm *vm, int vcpuid);
@@ -132,6 +142,7 @@ int vm_apicid2vcpuid(struct vm *vm, int apicid);
void vm_activate_cpu(struct vm *vm, int vcpu);
cpuset_t vm_active_cpus(struct vm *vm);
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
+void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
/*
* Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
@@ -158,6 +169,13 @@ vcpu_rendezvous_pending(void *rendezvous_cookie)
return (*(uintptr_t *)rendezvous_cookie != 0);
}
+static __inline int
+vcpu_suspended(void *suspend_cookie)
+{
+
+ return (*(int *)suspend_cookie);
+}
+
/*
* Return 1 if device indicated by bus/slot/func is supposed to be a
* pci passthrough device.
@@ -308,9 +326,10 @@ enum vm_exitcode {
VM_EXITCODE_PAGING,
VM_EXITCODE_INST_EMUL,
VM_EXITCODE_SPINUP_AP,
- VM_EXITCODE_SPINDOWN_CPU,
+ VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */
VM_EXITCODE_RENDEZVOUS,
VM_EXITCODE_IOAPIC_EOI,
+ VM_EXITCODE_SUSPENDED,
VM_EXITCODE_MAX
};
@@ -372,6 +391,9 @@ struct vm_exit {
struct {
int vector;
} ioapic_eoi;
+ struct {
+ enum vm_suspend_how how;
+ } suspended;
} u;
};
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
index eda9b94..fcd437f 100644
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -159,12 +159,17 @@ struct vm_hpet_cap {
uint32_t capabilities; /* lower 32 bits of HPET capabilities */
};
+struct vm_suspend {
+ enum vm_suspend_how how;
+};
+
enum {
/* general routines */
IOCNUM_ABIVERS = 0,
IOCNUM_RUN = 1,
IOCNUM_SET_CAPABILITY = 2,
IOCNUM_GET_CAPABILITY = 3,
+ IOCNUM_SUSPEND = 4,
/* memory apis */
IOCNUM_MAP_MEMORY = 10,
@@ -212,6 +217,8 @@ enum {
#define VM_RUN \
_IOWR('v', IOCNUM_RUN, struct vm_run)
+#define VM_SUSPEND \
+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
#define VM_MAP_MEMORY \
_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
#define VM_GET_MEMORY_SEG \
diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c
index 39f0ef7..4c88d12 100644
--- a/sys/amd64/vmm/amd/amdv.c
+++ b/sys/amd64/vmm/amd/amdv.c
@@ -67,7 +67,8 @@ amdv_vminit(struct vm *vm, struct pmap *pmap)
}
static int
-amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap, void *cookie)
+amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap,
+ void *rptr, void *sptr)
{
printf("amdv_vmrun: not implemented\n");
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 2da0ab7..1c39552 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -2136,7 +2136,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
static int
vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
- void *rendezvous_cookie)
+ void *rendezvous_cookie, void *suspend_cookie)
{
int rc, handled, launched;
struct vmx *vmx;
@@ -2193,9 +2193,10 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
* pmap_invalidate_ept().
*/
disable_intr();
- if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) {
+ if (vcpu_suspended(suspend_cookie)) {
enable_intr();
- handled = vmx_exit_astpending(vmx, vcpu, vmexit);
+ vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip());
+ handled = UNHANDLED;
break;
}
@@ -2205,6 +2206,12 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
break;
}
+ if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) {
+ enable_intr();
+ handled = vmx_exit_astpending(vmx, vcpu, vmexit);
+ break;
+ }
+
vmx_inject_interrupts(vmx, vcpu, vlapic);
vmx_run_trace(vmx, vcpu);
rc = vmx_enter_guest(vmxctx, vmx, launched);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 9d740d1..9aed5f5 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -139,6 +139,11 @@ struct vm {
cpuset_t rendezvous_done_cpus;
void *rendezvous_arg;
vm_rendezvous_func_t rendezvous_func;
+
+ int suspend;
+ volatile cpuset_t suspended_cpus;
+
+ volatile cpuset_t halted_cpus;
};
static int vmm_initialized;
@@ -149,8 +154,8 @@ static struct vmm_ops *ops;
#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0)
#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
-#define VMRUN(vmi, vcpu, rip, pmap, rptr) \
- (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr) : ENXIO)
+#define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
+ (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
#define VMSPACE_ALLOC(min, max) \
(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
@@ -184,12 +189,20 @@ static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+/*
+ * Halt the guest if all vcpus are executing a HLT instruction with
+ * interrupts disabled.
+ */
+static int halt_detection_enabled = 1;
+TUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled);
+SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
+ &halt_detection_enabled, 0,
+ "Halt VM if all vcpus execute HLT with interrupts disabled");
+
static int vmm_ipinum;
SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
"IPI vector used for vcpu notifications");
-static void vm_deactivate_cpu(struct vm *vm, int vcpuid);
-
static void
vcpu_cleanup(struct vm *vm, int i)
{
@@ -1003,56 +1016,73 @@ vm_handle_rendezvous(struct vm *vm, int vcpuid)
static int
vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
{
- struct vm_exit *vmexit;
struct vcpu *vcpu;
- int t, timo, spindown;
+ const char *wmesg;
+ int t, vcpu_halted, vm_halted;
+
+ KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
vcpu = &vm->vcpu[vcpuid];
- spindown = 0;
+ vcpu_halted = 0;
+ vm_halted = 0;
vcpu_lock(vcpu);
+ while (1) {
+ /*
+ * Do a final check for pending NMI or interrupts before
+ * really putting this thread to sleep. Also check for
+ * software events that would cause this vcpu to wakeup.
+ *
+ * These interrupts/events could have happened after the
+ * vcpu returned from VMRUN() and before it acquired the
+ * vcpu lock above.
+ */
+ if (vm->rendezvous_func != NULL || vm->suspend)
+ break;
+ if (vm_nmi_pending(vm, vcpuid))
+ break;
+ if (!intr_disabled) {
+ if (vm_extint_pending(vm, vcpuid) ||
+ vlapic_pending_intr(vcpu->vlapic, NULL)) {
+ break;
+ }
+ }
- /*
- * Do a final check for pending NMI or interrupts before
- * really putting this thread to sleep.
- *
- * These interrupts could have happened any time after we
- * returned from VMRUN() and before we grabbed the vcpu lock.
- */
- if (!vm_nmi_pending(vm, vcpuid) &&
- (intr_disabled || !vlapic_pending_intr(vcpu->vlapic, NULL))) {
- t = ticks;
- vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
- if (vlapic_enabled(vcpu->vlapic)) {
- /*
- * XXX msleep_spin() is not interruptible so use the
- * 'timo' to put an upper bound on the sleep time.
- */
- timo = hz;
- msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo);
+ /*
+ * Some Linux guests implement "halt" by having all vcpus
+ * execute HLT with interrupts disabled. 'halted_cpus' keeps
+ * track of the vcpus that have entered this state. When all
+ * vcpus enter the halted state the virtual machine is halted.
+ */
+ if (intr_disabled) {
+ wmesg = "vmhalt";
+ VCPU_CTR0(vm, vcpuid, "Halted");
+ if (!vcpu_halted && halt_detection_enabled) {
+ vcpu_halted = 1;
+ CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
+ }
+ if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
+ vm_halted = 1;
+ break;
+ }
} else {
- /*
- * Spindown the vcpu if the apic is disabled and it
- * had entered the halted state.
- */
- spindown = 1;
+ wmesg = "vmidle";
}
+
+ t = ticks;
+ vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+ msleep_spin(vcpu, &vcpu->mtx, wmesg, 0);
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
}
+
+ if (vcpu_halted)
+ CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
+
vcpu_unlock(vcpu);
- /*
- * Since 'vm_deactivate_cpu()' grabs a sleep mutex we must call it
- * outside the confines of the vcpu spinlock.
- */
- if (spindown) {
- *retu = true;
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
- vm_deactivate_cpu(vm, vcpuid);
- VCPU_CTR0(vm, vcpuid, "spinning down cpu");
- }
+ if (vm_halted)
+ vm_suspend(vm, VM_SUSPEND_HALT);
return (0);
}
@@ -1152,6 +1182,100 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
return (error);
}
+static int
+vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
+{
+ int i, done;
+ struct vcpu *vcpu;
+
+ done = 0;
+ vcpu = &vm->vcpu[vcpuid];
+
+ CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
+
+ /*
+ * Wait until all 'active_cpus' have suspended themselves.
+ *
+ * Since a VM may be suspended at any time including when one or
+ * more vcpus are doing a rendezvous we need to call the rendezvous
+ * handler while we are waiting to prevent a deadlock.
+ */
+ vcpu_lock(vcpu);
+ while (1) {
+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
+ VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
+ break;
+ }
+
+ if (vm->rendezvous_func == NULL) {
+ VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
+ vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+ msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
+ vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+ } else {
+ VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
+ vcpu_unlock(vcpu);
+ vm_handle_rendezvous(vm, vcpuid);
+ vcpu_lock(vcpu);
+ }
+ }
+ vcpu_unlock(vcpu);
+
+ /*
+ * Wakeup the other sleeping vcpus and return to userspace.
+ */
+ for (i = 0; i < VM_MAXCPU; i++) {
+ if (CPU_ISSET(i, &vm->suspended_cpus)) {
+ vcpu_notify_event(vm, i, false);
+ }
+ }
+
+ *retu = true;
+ return (0);
+}
+
+int
+vm_suspend(struct vm *vm, enum vm_suspend_how how)
+{
+ int i;
+
+ if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
+ return (EINVAL);
+
+ if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
+ VM_CTR2(vm, "virtual machine already suspended %d/%d",
+ vm->suspend, how);
+ return (EALREADY);
+ }
+
+ VM_CTR1(vm, "virtual machine successfully suspended %d", how);
+
+ /*
+ * Notify all active vcpus that they are now suspended.
+ */
+ for (i = 0; i < VM_MAXCPU; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm, i, false);
+ }
+
+ return (0);
+}
+
+void
+vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
+{
+ struct vm_exit *vmexit;
+
+ KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
+ ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
+
+ vmexit = vm_exitinfo(vm, vcpuid);
+ vmexit->rip = rip;
+ vmexit->inst_length = 0;
+ vmexit->exitcode = VM_EXITCODE_SUSPENDED;
+ vmexit->u.suspended.how = vm->suspend;
+}
+
int
vm_run(struct vm *vm, struct vm_run *vmrun)
{
@@ -1162,12 +1286,15 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
struct vm_exit *vme;
bool retu, intr_disabled;
pmap_t pmap;
+ void *rptr, *sptr;
vcpuid = vmrun->cpuid;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
+ rptr = &vm->rendezvous_func;
+ sptr = &vm->suspend;
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
@@ -1187,7 +1314,7 @@ restart:
restore_guest_fpustate(vcpu);
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
- error = VMRUN(vm->cookie, vcpuid, rip, pmap, &vm->rendezvous_func);
+ error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
save_guest_fpustate(vcpu);
@@ -1200,6 +1327,9 @@ restart:
if (error == 0) {
retu = false;
switch (vme->exitcode) {
+ case VM_EXITCODE_SUSPENDED:
+ error = vm_handle_suspend(vm, vcpuid, &retu);
+ break;
case VM_EXITCODE_IOAPIC_EOI:
vioapic_process_eoi(vm, vcpuid,
vme->u.ioapic_eoi.vector);
@@ -1567,30 +1697,6 @@ vm_activate_cpu(struct vm *vm, int vcpuid)
CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
}
-static void
-vm_deactivate_cpu(struct vm *vm, int vcpuid)
-{
-
- KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU,
- ("vm_deactivate_cpu: invalid vcpuid %d", vcpuid));
- KASSERT(CPU_ISSET(vcpuid, &vm->active_cpus),
- ("vm_deactivate_cpu: vcpuid %d is not active", vcpuid));
-
- VCPU_CTR0(vm, vcpuid, "deactivated");
- CPU_CLR_ATOMIC(vcpuid, &vm->active_cpus);
-
- /*
- * If a vcpu rendezvous is in progress then it could be blocked
- * on 'vcpuid' - unblock it before disappearing forever.
- */
- mtx_lock(&vm->rendezvous_mtx);
- if (vm->rendezvous_func != NULL) {
- VCPU_CTR0(vm, vcpuid, "unblock rendezvous after deactivation");
- wakeup(&vm->rendezvous_func);
- }
- mtx_unlock(&vm->rendezvous_mtx);
-}
-
cpuset_t
vm_active_cpus(struct vm *vm)
{
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index 6defd13..3112c52 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -166,6 +166,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_stat_desc *statdesc;
struct vm_x2apic *x2apic;
struct vm_gpa_pte *gpapte;
+ struct vm_suspend *vmsuspend;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
@@ -240,6 +241,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
vmrun = (struct vm_run *)data;
error = vm_run(sc->vm, vmrun);
break;
+ case VM_SUSPEND:
+ vmsuspend = (struct vm_suspend *)data;
+ error = vm_suspend(sc->vm, vmsuspend->how);
+ break;
case VM_STAT_DESC: {
statdesc = (struct vm_stat_desc *)data;
error = vmm_stat_desc_copy(statdesc->index,
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 1bd6a20..e662ca3 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -113,6 +113,7 @@ struct bhyvestats {
uint64_t cpu_switch_rotate;
uint64_t cpu_switch_direct;
int io_reset;
+ int io_poweroff;
} stats;
struct mt_vmm_info {
@@ -271,13 +272,6 @@ fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
}
static int
-vmexit_catch_reset(void)
-{
- stats.io_reset++;
- return (VMEXIT_RESET);
-}
-
-static int
vmexit_catch_inout(void)
{
return (VMEXIT_ABORT);
@@ -327,8 +321,10 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
case INOUT_OK:
return (VMEXIT_CONTINUE);
case INOUT_RESET:
+ stats.io_reset++;
return (VMEXIT_RESET);
case INOUT_POWEROFF:
+ stats.io_poweroff++;
return (VMEXIT_POWEROFF);
default:
fprintf(stderr, "Unhandled %s%c 0x%04x\n",
@@ -399,17 +395,6 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
}
static int
-vmexit_spindown_cpu(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
-{
- int lastcpu;
-
- lastcpu = fbsdrun_deletecpu(ctx, *pvcpu);
- if (!lastcpu)
- pthread_exit(NULL);
- return (vmexit_catch_reset());
-}
-
-static int
vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
@@ -493,6 +478,45 @@ vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
return (VMEXIT_CONTINUE);
}
+static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER;
+
+static int
+vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+ enum vm_suspend_how how;
+
+ how = vmexit->u.suspended.how;
+
+ fbsdrun_deletecpu(ctx, *pvcpu);
+
+ if (*pvcpu != BSP) {
+ pthread_mutex_lock(&resetcpu_mtx);
+ pthread_cond_signal(&resetcpu_cond);
+ pthread_mutex_unlock(&resetcpu_mtx);
+ pthread_exit(NULL);
+ }
+
+ pthread_mutex_lock(&resetcpu_mtx);
+ while (!CPU_EMPTY(&cpumask)) {
+ pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx);
+ }
+ pthread_mutex_unlock(&resetcpu_mtx);
+
+ switch (how) {
+ case VM_SUSPEND_RESET:
+ exit(0);
+ case VM_SUSPEND_POWEROFF:
+ exit(1);
+ case VM_SUSPEND_HALT:
+ exit(2);
+ default:
+ fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
+ exit(100);
+ }
+ return (0); /* NOTREACHED */
+}
+
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_INOUT] = vmexit_inout,
[VM_EXITCODE_VMX] = vmexit_vmx,
@@ -502,7 +526,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
- [VM_EXITCODE_SPINDOWN_CPU] = vmexit_spindown_cpu,
+ [VM_EXITCODE_SUSPENDED] = vmexit_suspend
};
static void
@@ -510,6 +534,7 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
{
int error, rc, prevcpu;
enum vm_exitcode exitcode;
+ enum vm_suspend_how how;
if (vcpumap[vcpu] != NULL) {
error = pthread_setaffinity_np(pthread_self(),
@@ -541,7 +566,15 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
rip = vmexit[vcpu].rip;
break;
case VMEXIT_RESET:
- exit(0);
+ case VMEXIT_POWEROFF:
+ if (rc == VMEXIT_RESET)
+ how = VM_SUSPEND_RESET;
+ else
+ how = VM_SUSPEND_POWEROFF;
+ error = vm_suspend(ctx, how);
+ assert(error == 0 || errno == EALREADY);
+ rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
+ break;
default:
exit(1);
}
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index 50a71bb..57d6e38 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -191,13 +191,16 @@ usage(void)
" [--get-highmem]\n"
" [--get-gpa-pmap]\n"
" [--assert-lapic-lvt=<pin>]\n"
- " [--inject-nmi]\n",
+ " [--inject-nmi]\n"
+ " [--force-reset]\n"
+ " [--force-poweroff]\n",
progname);
exit(1);
}
static int get_stats, getcap, setcap, capval, get_gpa_pmap;
static int inject_nmi, assert_lapic_lvt;
+static int force_reset, force_poweroff;
static const char *capname;
static int create, destroy, get_lowmem, get_highmem;
static uint64_t memsize;
@@ -565,6 +568,8 @@ main(int argc, char *argv[])
{ "create", NO_ARG, &create, 1 },
{ "destroy", NO_ARG, &destroy, 1 },
{ "inject-nmi", NO_ARG, &inject_nmi, 1 },
+ { "force-reset", NO_ARG, &force_reset, 1 },
+ { "force-poweroff", NO_ARG, &force_poweroff, 1 },
{ NULL, 0, NULL, 0 }
};
@@ -1534,6 +1539,12 @@ main(int argc, char *argv[])
printf("vm_run error %d\n", error);
}
+ if (!error && force_reset)
+ error = vm_suspend(ctx, VM_SUSPEND_RESET);
+
+ if (!error && force_poweroff)
+ error = vm_suspend(ctx, VM_SUSPEND_POWEROFF);
+
if (error)
printf("errno = %d\n", errno);
OpenPOWER on IntegriCloud