summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/libvmmapi/vmmapi.c37
-rw-r--r--lib/libvmmapi/vmmapi.h13
-rw-r--r--sys/amd64/include/vmm_dev.h4
-rw-r--r--sys/amd64/vmm/intel/vmx.c1
-rw-r--r--sys/amd64/vmm/vmm.c73
-rw-r--r--sys/amd64/vmm/vmm_dev.c4
-rw-r--r--usr.sbin/bhyve/bhyverun.c45
-rw-r--r--usr.sbin/bhyve/bhyverun.h5
-rw-r--r--usr.sbin/bhyve/inout.c20
-rw-r--r--usr.sbin/bhyve/task_switch.c16
-rw-r--r--usr.sbin/bhyvectl/bhyvectl.c5
11 files changed, 125 insertions, 98 deletions
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
index bca1174..5660f3b 100644
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -368,14 +368,13 @@ vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
}
int
-vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
+vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit)
{
int error;
struct vm_run vmrun;
bzero(&vmrun, sizeof(vmrun));
vmrun.cpuid = vcpu;
- vmrun.rip = rip;
error = ioctl(ctx->fd, VM_RUN, &vmrun);
bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
@@ -399,36 +398,22 @@ vm_reinit(struct vmctx *ctx)
return (ioctl(ctx->fd, VM_REINIT, 0));
}
-static int
-vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector,
- int error_code, int error_code_valid)
+int
+vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
+ uint32_t errcode, int restart_instruction)
{
struct vm_exception exc;
- bzero(&exc, sizeof(exc));
exc.cpuid = vcpu;
exc.vector = vector;
- exc.error_code = error_code;
- exc.error_code_valid = error_code_valid;
+ exc.error_code = errcode;
+ exc.error_code_valid = errcode_valid;
+ exc.restart_instruction = restart_instruction;
return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
}
int
-vm_inject_exception(struct vmctx *ctx, int vcpu, int vector)
-{
-
- return (vm_inject_exception_real(ctx, vcpu, vector, 0, 0));
-}
-
-int
-vm_inject_exception2(struct vmctx *ctx, int vcpu, int vector, int errcode)
-{
-
- return (vm_inject_exception_real(ctx, vcpu, vector, errcode, 1));
-}
-
-int
vm_apicid2vcpu(struct vmctx *ctx, int apicid)
{
/*
@@ -1198,3 +1183,11 @@ vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
*secs = rtctime.secs;
return (error);
}
+
+int
+vm_restart_instruction(void *arg, int vcpu)
+{
+ struct vmctx *ctx = arg;
+
+ return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
+}
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
index b134431..8634762 100644
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -32,6 +32,12 @@
#include <sys/param.h>
#include <sys/cpuset.h>
+/*
+ * API version for out-of-tree consumers like grub-bhyve for making compile
+ * time decisions.
+ */
+#define VMMAPI_VERSION 0101 /* 2 digit major followed by 2 digit minor */
+
struct iovec;
struct vmctx;
enum x2apic_state;
@@ -70,13 +76,12 @@ int vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg,
struct seg_desc *seg_desc);
int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
-int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
- struct vm_exit *ret_vmexit);
+int vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit);
int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how);
int vm_reinit(struct vmctx *ctx);
int vm_apicid2vcpu(struct vmctx *ctx, int apicid);
-int vm_inject_exception(struct vmctx *ctx, int vcpu, int vec);
-int vm_inject_exception2(struct vmctx *ctx, int vcpu, int vec, int errcode);
+int vm_inject_exception(struct vmctx *ctx, int vcpu, int vector,
+ int errcode_valid, uint32_t errcode, int restart_instruction);
int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
int vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector);
int vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
index f3354e3..9d031a9 100644
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -54,7 +54,6 @@ struct vm_seg_desc { /* data or code segment */
struct vm_run {
int cpuid;
- uint64_t rip; /* start running here */
struct vm_exit vm_exit;
};
@@ -238,6 +237,7 @@ enum {
IOCNUM_LAPIC_MSI = 36,
IOCNUM_LAPIC_LOCAL_IRQ = 37,
IOCNUM_IOAPIC_PINCOUNT = 38,
+ IOCNUM_RESTART_INSTRUCTION = 39,
/* PCI pass-thru */
IOCNUM_BIND_PPTDEV = 40,
@@ -360,4 +360,6 @@ enum {
_IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time)
#define VM_RTC_GETTIME \
_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
+#define VM_RESTART_INSTRUCTION \
+ _IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
#endif
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index a10a591..c83a8b2 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -2412,6 +2412,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
if (vm_mem_allocated(vmx->vm, gpa) ||
apic_access_fault(vmx, vcpu, gpa)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
+ vmexit->inst_length = 0;
vmexit->u.paging.gpa = gpa;
vmexit->u.paging.fault_type = ept_fault_type(qual);
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index ff32b33..615a639 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -109,6 +109,7 @@ struct vcpu {
uint64_t guest_xcr0; /* (i) guest %xcr0 register */
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
+ uint64_t nextrip; /* (x) next instruction to execute */
};
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
@@ -850,16 +851,26 @@ vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
}
int
-vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
+vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
{
+ struct vcpu *vcpu;
+ int error;
- if (vcpu < 0 || vcpu >= VM_MAXCPU)
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
- return (VMSETREG(vm->cookie, vcpu, reg, val));
+ error = VMSETREG(vm->cookie, vcpuid, reg, val);
+ if (error || reg != VM_REG_GUEST_RIP)
+ return (error);
+
+ /* Set 'nextrip' to match the value of %rip */
+ VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val);
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu->nextrip = val;
+ return (0);
}
static boolean_t
@@ -1199,6 +1210,9 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
+ KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
+ __func__, vme->inst_length));
+
ftype = vme->u.paging.fault_type;
KASSERT(ftype == VM_PROT_READ ||
ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
@@ -1224,9 +1238,6 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
if (rv != KERN_SUCCESS)
return (EFAULT);
done:
- /* restart execution at the faulting instruction */
- vm_restart_instruction(vm, vcpuid);
-
return (0);
}
@@ -1281,10 +1292,13 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
return (EFAULT);
/*
- * If the instruction length is not specified the update it now.
+ * If the instruction length was not specified then update it now
+ * along with 'nextrip'.
*/
- if (vme->inst_length == 0)
+ if (vme->inst_length == 0) {
vme->inst_length = vie->num_processed;
+ vcpu->nextrip += vie->num_processed;
+ }
/* return to userland unless this is an in-kernel emulated device */
if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
@@ -1433,7 +1447,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
int error, vcpuid;
struct vcpu *vcpu;
struct pcb *pcb;
- uint64_t tscval, rip;
+ uint64_t tscval;
struct vm_exit *vme;
bool retu, intr_disabled;
pmap_t pmap;
@@ -1455,7 +1469,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
- rip = vmrun->rip;
restart:
critical_enter();
@@ -1470,7 +1483,7 @@ restart:
restore_guest_fpustate(vcpu);
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
- error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
+ error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr);
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
save_guest_fpustate(vcpu);
@@ -1481,6 +1494,7 @@ restart:
if (error == 0) {
retu = false;
+ vcpu->nextrip = vme->rip + vme->inst_length;
switch (vme->exitcode) {
case VM_EXITCODE_SUSPENDED:
error = vm_handle_suspend(vm, vcpuid, &retu);
@@ -1517,10 +1531,8 @@ restart:
}
}
- if (error == 0 && retu == false) {
- rip = vme->rip + vme->inst_length;
+ if (error == 0 && retu == false)
goto restart;
- }
/* copy the exit information */
bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
@@ -1530,14 +1542,43 @@ restart:
int
vm_restart_instruction(void *arg, int vcpuid)
{
+ struct vm *vm;
struct vcpu *vcpu;
- struct vm *vm = arg;
+ enum vcpu_state state;
+ uint64_t rip;
+ int error;
+ vm = arg;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
vcpu = &vm->vcpu[vcpuid];
- vcpu->exitinfo.inst_length = 0;
+ state = vcpu_get_state(vm, vcpuid, NULL);
+ if (state == VCPU_RUNNING) {
+ /*
+ * When a vcpu is "running" the next instruction is determined
+ * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'.
+ * Thus setting 'inst_length' to zero will cause the current
+ * instruction to be restarted.
+ */
+ vcpu->exitinfo.inst_length = 0;
+ VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by "
+ "setting inst_length to zero", vcpu->exitinfo.rip);
+ } else if (state == VCPU_FROZEN) {
+ /*
+ * When a vcpu is "frozen" it is outside the critical section
+ * around VMRUN() and 'nextrip' points to the next instruction.
+ * Thus instruction restart is achieved by setting 'nextrip'
+ * to the vcpu's %rip.
+ */
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip);
+ KASSERT(!error, ("%s: error %d getting rip", __func__, error));
+ VCPU_CTR2(vm, vcpuid, "restarting instruction by updating "
+ "nextrip from %#lx to %#lx", vcpu->nextrip, rip);
+ vcpu->nextrip = rip;
+ } else {
+ panic("%s: invalid state %d", __func__, state);
+ }
return (0);
}
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index 9b39f14..0293d191 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -205,6 +205,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
case VM_ACTIVATE_CPU:
case VM_SET_INTINFO:
case VM_GET_INTINFO:
+ case VM_RESTART_INSTRUCTION:
/*
* XXX fragile, handle with care
* Assumes that the first field of the ioctl data is the vcpu.
@@ -506,6 +507,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
rtctime = (struct vm_rtc_time *)data;
rtctime->secs = vrtc_get_time(sc->vm);
break;
+ case VM_RESTART_INSTRUCTION:
+ error = vm_restart_instruction(sc->vm, vcpu);
+ break;
default:
error = ENOTTY;
break;
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 5971993..347e436 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -185,20 +185,14 @@ vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid,
int errcode)
{
struct vmctx *ctx;
- int error;
+ int error, restart_instruction;
ctx = arg;
- if (errcode_valid)
- error = vm_inject_exception2(ctx, vcpu, vector, errcode);
- else
- error = vm_inject_exception(ctx, vcpu, vector);
- assert(error == 0);
+ restart_instruction = 1;
- /*
- * Set the instruction length to 0 to ensure that the instruction is
- * restarted when the fault handler returns.
- */
- vmexit[vcpu].inst_length = 0;
+ error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode,
+ restart_instruction);
+ assert(error == 0);
}
void *
@@ -329,12 +323,6 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
}
error = emulate_inout(ctx, vcpu, vme, strictio);
- if (!error && in && !string) {
- error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
- vme->u.inout.eax);
- assert(error == 0);
- }
-
if (error) {
fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out",
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
@@ -358,7 +346,7 @@ vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
vme->u.msr.code, *pvcpu);
if (strictmsr) {
vm_inject_gp(ctx, *pvcpu);
- return (VMEXIT_RESTART);
+ return (VMEXIT_CONTINUE);
}
}
@@ -384,7 +372,7 @@ vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
vme->u.msr.code, vme->u.msr.wval, *pvcpu);
if (strictmsr) {
vm_inject_gp(ctx, *pvcpu);
- return (VMEXIT_RESTART);
+ return (VMEXIT_CONTINUE);
}
}
return (VMEXIT_CONTINUE);
@@ -462,9 +450,11 @@ static int
vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
+ assert(vmexit->inst_length == 0);
+
stats.vmexit_bogus++;
- return (VMEXIT_RESTART);
+ return (VMEXIT_CONTINUE);
}
static int
@@ -494,9 +484,11 @@ static int
vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
+ assert(vmexit->inst_length == 0);
+
stats.vmexit_mtrap++;
- return (VMEXIT_RESTART);
+ return (VMEXIT_CONTINUE);
}
static int
@@ -581,7 +573,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
};
static void
-vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
+vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
{
int error, rc, prevcpu;
enum vm_exitcode exitcode;
@@ -596,8 +588,11 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
error = vm_active_cpus(ctx, &active_cpus);
assert(CPU_ISSET(vcpu, &active_cpus));
+ error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
+ assert(error == 0);
+
while (1) {
- error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
+ error = vm_run(ctx, vcpu, &vmexit[vcpu]);
if (error != 0)
break;
@@ -614,10 +609,6 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
switch (rc) {
case VMEXIT_CONTINUE:
- rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
- break;
- case VMEXIT_RESTART:
- rip = vmexit[vcpu].rip;
break;
case VMEXIT_ABORT:
abort();
diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h
index 87824ef..c51bf48 100644
--- a/usr.sbin/bhyve/bhyverun.h
+++ b/usr.sbin/bhyve/bhyverun.h
@@ -35,9 +35,8 @@
#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
#endif
-#define VMEXIT_CONTINUE 1 /* continue from next instruction */
-#define VMEXIT_RESTART 2 /* restart current instruction */
-#define VMEXIT_ABORT 3 /* abort the vm run loop */
+#define VMEXIT_CONTINUE (0)
+#define VMEXIT_ABORT (-1)
struct vmctx;
extern int guest_ncpus;
diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c
index 1041a59..402b953 100644
--- a/usr.sbin/bhyve/inout.c
+++ b/usr.sbin/bhyve/inout.c
@@ -104,7 +104,7 @@ int
emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
{
int addrsize, bytes, flags, in, port, prot, rep;
- uint32_t val;
+ uint32_t eax, val;
inout_func_t handler;
void *arg;
int error, retval;
@@ -214,16 +214,20 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
}
/* Restart the instruction if more iterations remain */
- if (retval == 0 && count != 0)
- vmexit->inst_length = 0;
- } else {
- if (!in) {
- val = vmexit->u.inout.eax & vie_size2mask(bytes);
+ if (retval == 0 && count != 0) {
+ error = vm_restart_instruction(ctx, vcpu);
+ assert(error == 0);
}
+ } else {
+ eax = vmexit->u.inout.eax;
+ val = eax & vie_size2mask(bytes);
retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
if (retval == 0 && in) {
- vmexit->u.inout.eax &= ~vie_size2mask(bytes);
- vmexit->u.inout.eax |= val & vie_size2mask(bytes);
+ eax &= ~vie_size2mask(bytes);
+ eax |= val & vie_size2mask(bytes);
+ error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
+ eax);
+ assert(error == 0);
}
}
return (retval);
diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c
index b939c1a..ba6a9d2 100644
--- a/usr.sbin/bhyve/task_switch.c
+++ b/usr.sbin/bhyve/task_switch.c
@@ -725,21 +725,11 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(paging->cpu_mode == CPU_MODE_PROTECTED);
/*
- * Calculate the %eip to store in the old TSS before modifying the
- * 'inst_length'.
+ * Calculate the instruction pointer to store in the old TSS.
*/
eip = vmexit->rip + vmexit->inst_length;
/*
- * Set the 'inst_length' to '0'.
- *
- * If an exception is triggered during emulation of the task switch
- * then the exception handler should return to the instruction that
- * caused the task switch as opposed to the subsequent instruction.
- */
- vmexit->inst_length = 0;
-
- /*
* Section 4.6, "Access Rights" in Intel SDM Vol 3.
* The following page table accesses are implicitly supervisor mode:
* - accesses to GDT or LDT to load segment descriptors
@@ -883,8 +873,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
* after this point will be handled in the context of the new task and
* the saved instruction pointer will belong to the new task.
*/
- vmexit->rip = newtss.tss_eip;
- assert(vmexit->inst_length == 0);
+ error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
+ assert(error == 0);
/* Load processor state from new TSS */
error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index 170ca21..e2b514d 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -2118,10 +2118,7 @@ main(int argc, char *argv[])
}
if (!error && run) {
- error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
- assert(error == 0);
-
- error = vm_run(ctx, vcpu, rip, &vmexit);
+ error = vm_run(ctx, vcpu, &vmexit);
if (error == 0)
dump_vm_run_exitcode(&vmexit, vcpu);
else
OpenPOWER on IntegriCloud