diff options
Diffstat (limited to 'sys/amd64/vmm/vmm.c')
-rw-r--r-- | sys/amd64/vmm/vmm.c | 664 |
1 files changed, 490 insertions, 174 deletions
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 5fc6b94..5c2f202 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -39,18 +39,28 @@ __FBSDID("$FreeBSD$"); #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/rwlock.h> #include <sys/sched.h> #include <sys/smp.h> #include <sys/systm.h> #include <vm/vm.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_extern.h> +#include <vm/vm_param.h> #include <machine/vm.h> #include <machine/pcb.h> #include <machine/smp.h> #include <x86/apicreg.h> +#include <machine/pmap.h> +#include <machine/vmparam.h> #include <machine/vmm.h> +#include "vmm_ktr.h" #include "vmm_host.h" #include "vmm_mem.h" #include "vmm_util.h" @@ -84,15 +94,23 @@ struct vcpu { #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) +#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) +struct mem_seg { + vm_paddr_t gpa; + size_t len; + boolean_t wired; + vm_object_t object; +}; #define VM_MAX_MEMORY_SEGMENTS 2 struct vm { void *cookie; /* processor-specific data */ void *iommu; /* iommu-specific data */ + struct vmspace *vmspace; /* guest's address space */ struct vcpu vcpu[VM_MAXCPU]; int num_mem_segs; - struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS]; + struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; char name[VM_MAX_NAMELEN]; /* @@ -109,16 +127,14 @@ static struct vmm_ops *ops; #define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) -#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL) -#define VMRUN(vmi, vcpu, rip) \ - (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO) +#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) +#define VMRUN(vmi, vcpu, rip, pmap) \ + (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO) #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) -#define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \ - (ops != NULL ? \ - (*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \ - ENXIO) -#define VMMMAP_GET(vmi, gpa) \ - (ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO) +#define VMSPACE_ALLOC(min, max) \ + (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) +#define VMSPACE_FREE(vmspace) \ + (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) #define VMGETREG(vmi, vcpu, num, retval) \ (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) #define VMSETREG(vmi, vcpu, num, val) \ @@ -213,8 +229,7 @@ vmm_handler(module_t mod, int what, void *arg) switch (what) { case MOD_LOAD: vmmdev_init(); - if (ppt_num_devices() > 0) - iommu_init(); + iommu_init(); error = vmm_init(); if (error == 0) vmm_initialized = 1; @@ -265,7 +280,7 @@ vm_create(const char *name, struct vm **retvm) { int i; struct vm *vm; - vm_paddr_t maxaddr; + struct vmspace *vmspace; const int BSP = 0; @@ -279,59 +294,34 @@ vm_create(const char *name, struct vm **retvm) if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) return (EINVAL); + vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); + if (vmspace == NULL) + return (ENOMEM); + vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); strcpy(vm->name, name); - vm->cookie = VMINIT(vm); + vm->cookie = VMINIT(vm, vmspace_pmap(vmspace)); for (i = 0; i < VM_MAXCPU; i++) { vcpu_init(vm, i); guest_msrs_init(vm, i); } - maxaddr = vmm_mem_maxaddr(); - vm->iommu = iommu_create_domain(maxaddr); vm_activate_cpu(vm, BSP); + vm->vmspace = vmspace; *retvm = vm; return (0); } static void -vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) +vm_free_mem_seg(struct vm *vm, struct mem_seg *seg) { - size_t len; - vm_paddr_t hpa; - void *host_domain; - - host_domain = iommu_host_domain(); - - len = 0; - while (len < seg->len) { - hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); - if (hpa == (vm_paddr_t)-1) { - panic("vm_free_mem_segs: cannot free hpa " - "associated with gpa 0x%016lx", seg->gpa + len); - } - - /* - * Remove the 'gpa' to 'hpa' mapping in VMs domain. - * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'. - */ - iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE); - iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE); - - vmm_mem_free(hpa, PAGE_SIZE); - - len += PAGE_SIZE; - } - /* - * Invalidate cached translations associated with 'vm->iommu' since - * we have now moved some pages from it. - */ - iommu_invalidate_tlb(vm->iommu); + if (seg->object != NULL) + vmm_mem_free(vm->vmspace, seg->gpa, seg->len); - bzero(seg, sizeof(struct vm_memory_segment)); + bzero(seg, sizeof(*seg)); } void @@ -341,6 +331,9 @@ vm_destroy(struct vm *vm) ppt_unassign_all(vm); + if (vm->iommu != NULL) + iommu_destroy_domain(vm->iommu); + for (i = 0; i < vm->num_mem_segs; i++) vm_free_mem_seg(vm, &vm->mem_segs[i]); @@ -349,7 +342,7 @@ vm_destroy(struct vm *vm) for (i = 0; i < VM_MAXCPU; i++) vcpu_cleanup(&vm->vcpu[i]); - iommu_destroy_domain(vm->iommu); + VMSPACE_FREE(vm->vmspace); VMCLEANUP(vm->cookie); @@ -365,52 +358,48 @@ vm_name(struct vm *vm) int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { - const boolean_t spok = TRUE; /* superpage mappings are ok */ + vm_object_t obj; - return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE, - VM_PROT_RW, spok)); + if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) + return (ENOMEM); + else + return (0); } int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) { - const boolean_t spok = TRUE; /* superpage mappings are ok */ - return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0, - VM_PROT_NONE, spok)); + vmm_mmio_free(vm->vmspace, gpa, len); + return (0); } -/* - * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise - */ -static boolean_t -vm_gpa_available(struct vm *vm, vm_paddr_t gpa) +boolean_t +vm_mem_allocated(struct vm *vm, vm_paddr_t gpa) { int i; vm_paddr_t gpabase, gpalimit; - if (gpa & PAGE_MASK) - panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa); - for (i = 0; i < vm->num_mem_segs; i++) { gpabase = vm->mem_segs[i].gpa; gpalimit = gpabase + vm->mem_segs[i].len; if (gpa >= gpabase && gpa < gpalimit) - return (FALSE); + return (TRUE); /* 'gpa' is regular memory */ } - return (TRUE); + if (ppt_is_mmio(vm, gpa)) + return (TRUE); /* 'gpa' is pci passthru mmio */ + + return (FALSE); } int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) { - int error, available, allocated; - struct vm_memory_segment *seg; - vm_paddr_t g, hpa; - void *host_domain; - - const boolean_t spok = TRUE; /* superpage mappings are ok */ + int available, allocated; + struct mem_seg *seg; + vm_object_t object; + vm_paddr_t g; if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) return (EINVAL); @@ -418,10 +407,10 @@ vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) available = allocated = 0; g = gpa; while (g < gpa + len) { - if (vm_gpa_available(vm, g)) - available++; - else + if (vm_mem_allocated(vm, g)) allocated++; + else + available++; g += PAGE_SIZE; } @@ -443,61 +432,203 @@ vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) return (E2BIG); - host_domain = iommu_host_domain(); - seg = &vm->mem_segs[vm->num_mem_segs]; - error = 0; + if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) + return (ENOMEM); + seg->gpa = gpa; - seg->len = 0; - while (seg->len < len) { - hpa = vmm_mem_alloc(PAGE_SIZE); - if (hpa == 0) { - error = ENOMEM; - break; - } + seg->len = len; + seg->object = object; + seg->wired = FALSE; - error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, - VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok); - if (error) + vm->num_mem_segs++; + + return (0); +} + +static void +vm_gpa_unwire(struct vm *vm) +{ + int i, rv; + struct mem_seg *seg; + + for (i = 0; i < vm->num_mem_segs; i++) { + seg = &vm->mem_segs[i]; + if (!seg->wired) + continue; + + rv = vm_map_unwire(&vm->vmspace->vm_map, + seg->gpa, seg->gpa + seg->len, + VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); + KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " + "%#lx/%ld could not be unwired: %d", + vm_name(vm), seg->gpa, seg->len, rv)); + + seg->wired = FALSE; + } +} + +static int +vm_gpa_wire(struct vm *vm) +{ + int i, rv; + struct mem_seg *seg; + + for (i = 0; i < vm->num_mem_segs; i++) { + seg = &vm->mem_segs[i]; + if (seg->wired) + continue; + + /* XXX rlimits? */ + rv = vm_map_wire(&vm->vmspace->vm_map, + seg->gpa, seg->gpa + seg->len, + VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); + if (rv != KERN_SUCCESS) break; + seg->wired = TRUE; + } + + if (i < vm->num_mem_segs) { /* - * Remove the 1:1 mapping for 'hpa' from the 'host_domain'. - * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain. + * Undo the wiring before returning an error. */ - iommu_remove_mapping(host_domain, hpa, PAGE_SIZE); - iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE); + vm_gpa_unwire(vm); + return (EAGAIN); + } + + return (0); +} + +static void +vm_iommu_modify(struct vm *vm, boolean_t map) +{ + int i, sz; + vm_paddr_t gpa, hpa; + struct mem_seg *seg; + void *vp, *cookie, *host_domain; - seg->len += PAGE_SIZE; + sz = PAGE_SIZE; + host_domain = iommu_host_domain(); + + for (i = 0; i < vm->num_mem_segs; i++) { + seg = &vm->mem_segs[i]; + KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", + vm_name(vm), seg->gpa, seg->len)); + + gpa = seg->gpa; + while (gpa < seg->gpa + seg->len) { + vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, + &cookie); + KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", + vm_name(vm), gpa)); + + vm_gpa_release(cookie); + + hpa = DMAP_TO_PHYS((uintptr_t)vp); + if (map) { + iommu_create_mapping(vm->iommu, gpa, hpa, sz); + iommu_remove_mapping(host_domain, hpa, sz); + } else { + iommu_remove_mapping(vm->iommu, gpa, sz); + iommu_create_mapping(host_domain, hpa, hpa, sz); + } + + gpa += PAGE_SIZE; + } } - if (error) { - vm_free_mem_seg(vm, seg); + /* + * Invalidate the cached translations associated with the domain + * from which pages were removed. + */ + if (map) + iommu_invalidate_tlb(host_domain); + else + iommu_invalidate_tlb(vm->iommu); +} + +#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) +#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) + +int +vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) +{ + int error; + + error = ppt_unassign_device(vm, bus, slot, func); + if (error) return (error); + + if (ppt_num_devices(vm) == 0) { + vm_iommu_unmap(vm); + vm_gpa_unwire(vm); } + return (0); +} + +int +vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) +{ + int error; + vm_paddr_t maxaddr; /* - * Invalidate cached translations associated with 'host_domain' since - * we have now moved some pages from it. + * Virtual machines with pci passthru devices get special treatment: + * - the guest physical memory is wired + * - the iommu is programmed to do the 'gpa' to 'hpa' translation + * + * We need to do this before the first pci passthru device is attached. */ - iommu_invalidate_tlb(host_domain); + if (ppt_num_devices(vm) == 0) { + KASSERT(vm->iommu == NULL, + ("vm_assign_pptdev: iommu must be NULL")); + maxaddr = vmm_mem_maxaddr(); + vm->iommu = iommu_create_domain(maxaddr); - vm->num_mem_segs++; + error = vm_gpa_wire(vm); + if (error) + return (error); - return (0); + vm_iommu_map(vm); + } + + error = ppt_assign_device(vm, bus, slot, func); + return (error); } -vm_paddr_t -vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len) +void * +vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) { - vm_paddr_t nextpage; + int count, pageoff; + vm_page_t m; + + pageoff = gpa & PAGE_MASK; + if (len > PAGE_SIZE - pageoff) + panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); - nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); - if (len > nextpage - gpa) - panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len); + count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, + trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); - return (VMMMAP_GET(vm->cookie, gpa)); + if (count == 1) { + *cookie = m; + return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); + } else { + *cookie = NULL; + return (NULL); + } +} + +void +vm_gpa_release(void *cookie) +{ + vm_page_t m = cookie; + + vm_page_lock(m); + vm_page_unhold(m); + vm_page_unlock(m); } int @@ -508,7 +639,9 @@ vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, for (i = 0; i < vm->num_mem_segs; i++) { if (gpabase == vm->mem_segs[i].gpa) { - *seg = vm->mem_segs[i]; + seg->gpa = vm->mem_segs[i].gpa; + seg->len = vm->mem_segs[i].len; + seg->wired = vm->mem_segs[i].wired; return (0); } } @@ -516,6 +649,33 @@ vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, } int +vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, + vm_offset_t *offset, struct vm_object **object) +{ + int i; + size_t seg_len; + vm_paddr_t seg_gpa; + vm_object_t seg_obj; + + for (i = 0; i < vm->num_mem_segs; i++) { + if ((seg_obj = vm->mem_segs[i].object) == NULL) + continue; + + seg_gpa = vm->mem_segs[i].gpa; + seg_len = vm->mem_segs[i].len; + + if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { + *offset = gpa - seg_gpa; + *object = seg_obj; + vm_object_reference(seg_obj); + return (0); + } + } + + return (EINVAL); +} + +int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) { @@ -633,26 +793,215 @@ save_guest_fpustate(struct vcpu *vcpu) static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); +static int +vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) +{ + int error; + + vcpu_assert_locked(vcpu); + + /* + * The following state transitions are allowed: + * IDLE -> FROZEN -> IDLE + * FROZEN -> RUNNING -> FROZEN + * FROZEN -> SLEEPING -> FROZEN + */ + switch (vcpu->state) { + case VCPU_IDLE: + case VCPU_RUNNING: + case VCPU_SLEEPING: + error = (newstate != VCPU_FROZEN); + break; + case VCPU_FROZEN: + error = (newstate == VCPU_FROZEN); + break; + default: + error = 1; + break; + } + + if (error == 0) + vcpu->state = newstate; + else + error = EBUSY; + + return (error); +} + +static void +vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) +{ + int error; + + if ((error = vcpu_set_state(vm, vcpuid, newstate)) != 0) + panic("Error %d setting state to %d\n", error, newstate); +} + +static void +vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) +{ + int error; + + if ((error = vcpu_set_state_locked(vcpu, newstate)) != 0) + panic("Error %d setting state to %d", error, newstate); +} + +/* + * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. + */ +static int +vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t *retu) +{ + struct vcpu *vcpu; + int sleepticks, t; + + vcpu = &vm->vcpu[vcpuid]; + + vcpu_lock(vcpu); + + /* + * Figure out the number of host ticks until the next apic + * timer interrupt in the guest. + */ + sleepticks = lapic_timer_tick(vm, vcpuid); + + /* + * If the guest local apic timer is disabled then sleep for + * a long time but not forever. + */ + if (sleepticks < 0) + sleepticks = hz; + + /* + * Do a final check for pending NMI or interrupts before + * really putting this thread to sleep. + * + * These interrupts could have happened any time after we + * returned from VMRUN() and before we grabbed the vcpu lock. + */ + if (!vm_nmi_pending(vm, vcpuid) && lapic_pending_intr(vm, vcpuid) < 0) { + if (sleepticks <= 0) + panic("invalid sleepticks %d", sleepticks); + t = ticks; + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); + vcpu_require_state_locked(vcpu, VCPU_FROZEN); + vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); + } + vcpu_unlock(vcpu); + + return (0); +} + +static int +vm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu) +{ + int rv, ftype; + struct vm_map *map; + struct vcpu *vcpu; + struct vm_exit *vme; + + vcpu = &vm->vcpu[vcpuid]; + vme = &vcpu->exitinfo; + + ftype = vme->u.paging.fault_type; + KASSERT(ftype == VM_PROT_READ || + ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, + ("vm_handle_paging: invalid fault_type %d", ftype)); + + if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { + rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), + vme->u.paging.gpa, ftype); + if (rv == 0) + goto done; + } + + map = &vm->vmspace->vm_map; + rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); + + VMM_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, ftype = %d", + rv, vme->u.paging.gpa, ftype); + + if (rv != KERN_SUCCESS) + return (EFAULT); +done: + /* restart execution at the faulting instruction */ + vme->inst_length = 0; + + return (0); +} + +static int +vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu) +{ + struct vie *vie; + struct vcpu *vcpu; + struct vm_exit *vme; + int error, inst_length; + uint64_t rip, gla, gpa, cr3; + + vcpu = &vm->vcpu[vcpuid]; + vme = &vcpu->exitinfo; + + rip = vme->rip; + inst_length = vme->inst_length; + + gla = vme->u.inst_emul.gla; + gpa = vme->u.inst_emul.gpa; + cr3 = vme->u.inst_emul.cr3; + vie = &vme->u.inst_emul.vie; + + vie_init(vie); + + /* Fetch, decode and emulate the faulting instruction */ + if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, vie) != 0) + return (EFAULT); + + if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0) + return (EFAULT); + + /* return to userland unless this is a local apic access */ + if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE) { + *retu = TRUE; + return (0); + } + + error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, + lapic_mmio_read, lapic_mmio_write, 0); + + /* return to userland to spin up the AP */ + if (error == 0 && vme->exitcode == VM_EXITCODE_SPINUP_AP) + *retu = TRUE; + + return (error); +} + int vm_run(struct vm *vm, struct vm_run *vmrun) { - int error, vcpuid, sleepticks, t; + int error, vcpuid; struct vcpu *vcpu; struct pcb *pcb; uint64_t tscval, rip; struct vm_exit *vme; + boolean_t retu; + pmap_t pmap; vcpuid = vmrun->cpuid; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); + pmap = vmspace_pmap(vm->vmspace); vcpu = &vm->vcpu[vcpuid]; - vme = &vmrun->vm_exit; + vme = &vcpu->exitinfo; rip = vmrun->rip; restart: critical_enter(); + KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), + ("vm_run: absurd pm_active")); + tscval = rdtsc(); pcb = PCPU_GET(curpcb); @@ -661,62 +1010,44 @@ restart: restore_guest_msrs(vm, vcpuid); restore_guest_fpustate(vcpu); + vcpu_require_state(vm, vcpuid, VCPU_RUNNING); vcpu->hostcpu = curcpu; - error = VMRUN(vm->cookie, vcpuid, rip); + error = VMRUN(vm->cookie, vcpuid, rip, pmap); vcpu->hostcpu = NOCPU; + vcpu_require_state(vm, vcpuid, VCPU_FROZEN); save_guest_fpustate(vcpu); restore_host_msrs(vm, vcpuid); vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); - /* copy the exit information */ - bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit)); - critical_exit(); - /* - * Oblige the guest's desire to 'hlt' by sleeping until the vcpu - * is ready to run. - */ - if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) { - vcpu_lock(vcpu); - - /* - * Figure out the number of host ticks until the next apic - * timer interrupt in the guest. - */ - sleepticks = lapic_timer_tick(vm, vcpuid); - - /* - * If the guest local apic timer is disabled then sleep for - * a long time but not forever. - */ - if (sleepticks < 0) - sleepticks = hz; - - /* - * Do a final check for pending NMI or interrupts before - * really putting this thread to sleep. - * - * These interrupts could have happened any time after we - * returned from VMRUN() and before we grabbed the vcpu lock. - */ - if (!vm_nmi_pending(vm, vcpuid) && - lapic_pending_intr(vm, vcpuid) < 0) { - if (sleepticks <= 0) - panic("invalid sleepticks %d", sleepticks); - t = ticks; - msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); - vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); + if (error == 0) { + retu = FALSE; + switch (vme->exitcode) { + case VM_EXITCODE_HLT: + error = vm_handle_hlt(vm, vcpuid, &retu); + break; + case VM_EXITCODE_PAGING: + error = vm_handle_paging(vm, vcpuid, &retu); + break; + case VM_EXITCODE_INST_EMUL: + error = vm_handle_inst_emul(vm, vcpuid, &retu); + break; + default: + retu = TRUE; /* handled in userland */ + break; } + } - vcpu_unlock(vcpu); - + if (error == 0 && retu == FALSE) { rip = vme->rip + vme->inst_length; goto restart; } + /* copy the exit information */ + bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); return (error); } @@ -869,7 +1200,7 @@ vm_iommu_domain(struct vm *vm) } int -vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) +vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) { int error; struct vcpu *vcpu; @@ -880,20 +1211,7 @@ vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); - - /* - * The following state transitions are allowed: - * IDLE -> RUNNING -> IDLE - * IDLE -> CANNOT_RUN -> IDLE - */ - if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) || - (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) { - error = 0; - vcpu->state = state; - } else { - error = EBUSY; - } - + error = vcpu_set_state_locked(vcpu, newstate); vcpu_unlock(vcpu); return (error); @@ -979,16 +1297,7 @@ vm_interrupt_hostcpu(struct vm *vm, int vcpuid) vcpu_lock(vcpu); hostcpu = vcpu->hostcpu; if (hostcpu == NOCPU) { - /* - * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then - * the host thread must be sleeping waiting for an event to - * kick the vcpu out of 'hlt'. - * - * XXX this is racy because the condition exists right before - * and after calling VMRUN() in vm_run(). The wakeup() is - * benign in this case. - */ - if (vcpu->state == VCPU_RUNNING) + if (vcpu->state == VCPU_SLEEPING) wakeup_one(vcpu); } else { if (vcpu->state != VCPU_RUNNING) @@ -998,3 +1307,10 @@ vm_interrupt_hostcpu(struct vm *vm, int vcpuid) } vcpu_unlock(vcpu); } + +struct vmspace * +vm_get_vmspace(struct vm *vm) +{ + + return (vm->vmspace); +} |