From d9f07f98412532f2077e8a8503651536223931ca Mon Sep 17 00:00:00 2001 From: neel Date: Sun, 18 Jan 2015 03:08:30 +0000 Subject: Simplify instruction restart logic in bhyve. Keep track of the next instruction to be executed by the vcpu as 'nextrip'. As a result the VM_RUN ioctl no longer takes the %rip where a vcpu should start execution. Also, instruction restart happens implicitly via 'vm_inject_exception()' or explicitly via 'vm_restart_instruction()'. The APIs behave identically in both kernel and userspace contexts. The main beneficiary is the instruction emulation code that executes in both contexts. bhyve(8) VM exit handlers now treat 'vmexit->rip' and 'vmexit->inst_length' as readonly: - Restarting an instruction is now done by calling 'vm_restart_instruction()' as opposed to setting 'vmexit->inst_length' to 0 (e.g. emulate_inout()) - Resuming vcpu at an arbitrary %rip is now done by setting VM_REG_GUEST_RIP as opposed to changing 'vmexit->rip' (e.g. vmexit_task_switch()) Differential Revision: https://reviews.freebsd.org/D1526 Reviewed by: grehan MFC after: 2 weeks --- sys/amd64/vmm/intel/vmx.c | 1 + sys/amd64/vmm/vmm.c | 73 ++++++++++++++++++++++++++++++++++++----------- sys/amd64/vmm/vmm_dev.c | 4 +++ 3 files changed, 62 insertions(+), 16 deletions(-) (limited to 'sys/amd64/vmm') diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index a10a591..c83a8b2 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -2412,6 +2412,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) if (vm_mem_allocated(vmx->vm, gpa) || apic_access_fault(vmx, vcpu, gpa)) { vmexit->exitcode = VM_EXITCODE_PAGING; + vmexit->inst_length = 0; vmexit->u.paging.gpa = gpa; vmexit->u.paging.fault_type = ept_fault_type(qual); vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index ff32b33..615a639 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -109,6 +109,7 @@ struct vcpu { uint64_t guest_xcr0; /* (i) guest %xcr0 register */ void *stats; /* (a,i) statistics */ struct vm_exit exitinfo; /* (x) exit reason and collateral */ + uint64_t nextrip; /* (x) next instruction to execute */ }; #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) @@ -850,16 +851,26 @@ vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) } int -vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) +vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) { + struct vcpu *vcpu; + int error; - if (vcpu < 0 || vcpu >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); if (reg >= VM_REG_LAST) return (EINVAL); - return (VMSETREG(vm->cookie, vcpu, reg, val)); + error = VMSETREG(vm->cookie, vcpuid, reg, val); + if (error || reg != VM_REG_GUEST_RIP) + return (error); + + /* Set 'nextrip' to match the value of %rip */ + VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); + vcpu = &vm->vcpu[vcpuid]; + vcpu->nextrip = val; + return (0); } static boolean_t @@ -1199,6 +1210,9 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; + KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", + __func__, vme->inst_length)); + ftype = vme->u.paging.fault_type; KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, @@ -1224,9 +1238,6 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) if (rv != KERN_SUCCESS) return (EFAULT); done: - /* restart execution at the faulting instruction */ - vm_restart_instruction(vm, vcpuid); - return (0); } @@ -1281,10 +1292,13 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) return (EFAULT); /* - * If the instruction length is not specified the update it now. + * If the instruction length was not specified then update it now + * along with 'nextrip'. */ - if (vme->inst_length == 0) + if (vme->inst_length == 0) { vme->inst_length = vie->num_processed; + vcpu->nextrip += vie->num_processed; + } /* return to userland unless this is an in-kernel emulated device */ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { @@ -1433,7 +1447,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun) int error, vcpuid; struct vcpu *vcpu; struct pcb *pcb; - uint64_t tscval, rip; + uint64_t tscval; struct vm_exit *vme; bool retu, intr_disabled; pmap_t pmap; @@ -1455,7 +1469,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun) pmap = vmspace_pmap(vm->vmspace); vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; - rip = vmrun->rip; restart: critical_enter(); @@ -1470,7 +1483,7 @@ restart: restore_guest_fpustate(vcpu); vcpu_require_state(vm, vcpuid, VCPU_RUNNING); - error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr); + error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr); vcpu_require_state(vm, vcpuid, VCPU_FROZEN); save_guest_fpustate(vcpu); @@ -1481,6 +1494,7 @@ restart: if (error == 0) { retu = false; + vcpu->nextrip = vme->rip + vme->inst_length; switch (vme->exitcode) { case VM_EXITCODE_SUSPENDED: error = vm_handle_suspend(vm, vcpuid, &retu); @@ -1517,10 +1531,8 @@ restart: } } - if (error == 0 && retu == false) { - rip = vme->rip + vme->inst_length; + if (error == 0 && retu == false) goto restart; - } /* copy the exit information */ bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); @@ -1530,14 +1542,43 @@ restart: int vm_restart_instruction(void *arg, int vcpuid) { + struct vm *vm; struct vcpu *vcpu; - struct vm *vm = arg; + enum vcpu_state state; + uint64_t rip; + int error; + vm = arg; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; - vcpu->exitinfo.inst_length = 0; + state = vcpu_get_state(vm, vcpuid, NULL); + if (state == VCPU_RUNNING) { + /* + * When a vcpu is "running" the next instruction is determined + * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. + * Thus setting 'inst_length' to zero will cause the current + * instruction to be restarted. + */ + vcpu->exitinfo.inst_length = 0; + VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " + "setting inst_length to zero", vcpu->exitinfo.rip); + } else if (state == VCPU_FROZEN) { + /* + * When a vcpu is "frozen" it is outside the critical section + * around VMRUN() and 'nextrip' points to the next instruction. + * Thus instruction restart is achieved by setting 'nextrip' + * to the vcpu's %rip. + */ + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); + KASSERT(!error, ("%s: error %d getting rip", __func__, error)); + VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " + "nextrip from %#lx to %#lx", vcpu->nextrip, rip); + vcpu->nextrip = rip; + } else { + panic("%s: invalid state %d", __func__, state); + } return (0); } diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 9b39f14..0293d191 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -205,6 +205,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_ACTIVATE_CPU: case VM_SET_INTINFO: case VM_GET_INTINFO: + case VM_RESTART_INSTRUCTION: /* * XXX fragile, handle with care * Assumes that the first field of the ioctl data is the vcpu. @@ -506,6 +507,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, rtctime = (struct vm_rtc_time *)data; rtctime->secs = vrtc_get_time(sc->vm); break; + case VM_RESTART_INSTRUCTION: + error = vm_restart_instruction(sc->vm, vcpu); + break; default: error = ENOTTY; break; -- cgit v1.1