diff options
author | neel <neel@FreeBSD.org> | 2012-10-12 18:32:44 +0000 |
---|---|---|
committer | neel <neel@FreeBSD.org> | 2012-10-12 18:32:44 +0000 |
commit | e3e8a520e280f32230da3ddfa4c5260fea0e15a1 (patch) | |
tree | 0bfaa9a4adf32c0b651f9959696d1a24545e8bd8 | |
parent | 4829fce72f3af3b96bc018ac20f556ee33b7f44f (diff) | |
download | FreeBSD-src-e3e8a520e280f32230da3ddfa4c5260fea0e15a1.zip FreeBSD-src-e3e8a520e280f32230da3ddfa4c5260fea0e15a1.tar.gz |
Provide per-vcpu locks instead of relying on a single big lock.
This also gets rid of all the witness.watch warnings related to calling
malloc(M_WAITOK) while holding a mutex.
Reviewed by: grehan
-rw-r--r-- | sys/amd64/include/vmm.h | 19 | ||||
-rw-r--r-- | sys/amd64/vmm/intel/vmx.c | 4 | ||||
-rw-r--r-- | sys/amd64/vmm/io/ppt.c | 2 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm.c | 91 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_dev.c | 67 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_ipi.c | 24 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_ipi.h | 3 |
7 files changed, 134 insertions, 76 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index be22eec..4dfdd04 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -130,19 +130,24 @@ int vmm_is_pptdev(int bus, int slot, int func); void *vm_iommu_domain(struct vm *vm); -#define VCPU_STOPPED 0 -#define VCPU_RUNNING 1 -void vm_set_run_state(struct vm *vm, int vcpu, int running); -int vm_get_run_state(struct vm *vm, int vcpu, int *hostcpu); +enum vcpu_state { + VCPU_IDLE, + VCPU_RUNNING, + VCPU_CANNOT_RUN, +}; -void *vcpu_stats(struct vm *vm, int vcpu); +int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state); +enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu); static int __inline -vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu) +vcpu_is_running(struct vm *vm, int vcpu) { - return (vm_get_run_state(vm, vcpu, hostcpu) == VCPU_RUNNING); + return (vcpu_get_state(vm, vcpu) == VCPU_RUNNING); } +void *vcpu_stats(struct vm *vm, int vcpu); +void vm_interrupt_hostcpu(struct vm *vm, int vcpu); + #endif /* KERNEL */ #define VM_MAXCPU 8 /* maximum virtual cpus */ diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 3fbe5a1..6a1dbed 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -1568,7 +1568,7 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) * vmcs_getreg will VMCLEAR the vmcs when it is done which will cause * the subsequent vmlaunch/vmresume to fail. */ - if (vcpu_is_running(vmx->vm, vcpu, NULL)) + if (vcpu_is_running(vmx->vm, vcpu)) panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); return (vmcs_getreg(&vmx->vmcs[vcpu], reg, retval)); @@ -1596,7 +1596,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) * vmcs_setreg will VMCLEAR the vmcs when it is done which will cause * the subsequent vmlaunch/vmresume to fail. */ - if (vcpu_is_running(vmx->vm, vcpu, NULL)) + if (vcpu_is_running(vmx->vm, vcpu)) panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu); error = vmcs_setreg(&vmx->vmcs[vcpu], reg, val); diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index d6fef9a..3044fc5 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$"); #include "iommu.h" #include "ppt.h" +/* XXX locking */ + #define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0])) #define MAX_MMIOSEGS (PCIR_MAX_BAR_0 + 1) #define MAX_MSIMSGS 32 diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 019b9a8..8d8f143 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include <machine/vm.h> #include <machine/pcb.h> +#include <machine/smp.h> #include <x86/apicreg.h> #include <machine/vmm.h> @@ -65,6 +66,8 @@ struct vlapic; struct vcpu { int flags; + enum vcpu_state state; + struct mtx mtx; int pincpu; /* host cpuid this vcpu is bound to */ int hostcpu; /* host cpuid this vcpu last ran on */ uint64_t guest_msrs[VMM_MSR_NUM]; @@ -76,7 +79,6 @@ struct vcpu { enum x2apic_state x2apic_state; }; #define VCPU_F_PINNED 0x0001 -#define VCPU_F_RUNNING 0x0002 #define VCPU_PINCPU(vm, vcpuid) \ ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1) @@ -89,6 +91,10 @@ do { \ vm->vcpu[vcpuid].pincpu = host_cpuid; \ } while(0) +#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_DEF) +#define vcpu_lock(v) mtx_lock(&((v)->mtx)) +#define vcpu_unlock(v) mtx_unlock(&((v)->mtx)) + #define VM_MAX_MEMORY_SEGMENTS 2 struct vm { @@ -162,7 +168,8 @@ vcpu_init(struct vm *vm, uint32_t vcpu_id) vcpu = &vm->vcpu[vcpu_id]; - vcpu->hostcpu = -1; + vcpu_lock_init(vcpu); + vcpu->hostcpu = NOCPU; vcpu->vcpuid = vcpu_id; vcpu->vlapic = vlapic_init(vm, vcpu_id); vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); @@ -667,11 +674,13 @@ vm_run(struct vm *vm, struct vm_run *vmrun) pcb = PCPU_GET(curpcb); set_pcb_flags(pcb, PCB_FULL_IRET); - vcpu->hostcpu = curcpu; - restore_guest_msrs(vm, vcpuid); restore_guest_fpustate(vcpu); + + vcpu->hostcpu = curcpu; error = VMRUN(vm->cookie, vcpuid, vmrun->rip); + vcpu->hostcpu = NOCPU; + save_guest_fpustate(vcpu); restore_host_msrs(vm, vcpuid); @@ -787,9 +796,10 @@ vm_iommu_domain(struct vm *vm) return (vm->iommu); } -void -vm_set_run_state(struct vm *vm, int vcpuid, int state) +int +vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) { + int error; struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) @@ -797,43 +807,42 @@ vm_set_run_state(struct vm *vm, int vcpuid, int state) vcpu = &vm->vcpu[vcpuid]; - if (state == VCPU_RUNNING) { - if (vcpu->flags & VCPU_F_RUNNING) { - panic("vm_set_run_state: %s[%d] is already running", - vm_name(vm), vcpuid); - } - vcpu->flags |= VCPU_F_RUNNING; + vcpu_lock(vcpu); + + /* + * The following state transitions are allowed: + * IDLE -> RUNNING -> IDLE + * IDLE -> CANNOT_RUN -> IDLE + */ + if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) || + (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) { + error = 0; + vcpu->state = state; } else { - if ((vcpu->flags & VCPU_F_RUNNING) == 0) { - panic("vm_set_run_state: %s[%d] is already stopped", - vm_name(vm), vcpuid); - } - vcpu->flags &= ~VCPU_F_RUNNING; + error = EBUSY; } + + vcpu_unlock(vcpu); + + return (error); } -int -vm_get_run_state(struct vm *vm, int vcpuid, int *cpuptr) +enum vcpu_state +vcpu_get_state(struct vm *vm, int vcpuid) { - int retval, hostcpu; struct vcpu *vcpu; + enum vcpu_state state; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_get_run_state: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; - if (vcpu->flags & VCPU_F_RUNNING) { - retval = VCPU_RUNNING; - hostcpu = vcpu->hostcpu; - } else { - retval = VCPU_STOPPED; - hostcpu = -1; - } - if (cpuptr) - *cpuptr = hostcpu; + vcpu_lock(vcpu); + state = vcpu->state; + vcpu_unlock(vcpu); - return (retval); + return (state); } void @@ -884,3 +893,25 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) return (0); } + +void +vm_interrupt_hostcpu(struct vm *vm, int vcpuid) +{ + int hostcpu; + struct vcpu *vcpu; + + vcpu = &vm->vcpu[vcpuid]; + + /* + * XXX racy but the worst case is that we'll send an unnecessary IPI + * to the 'hostcpu'. + * + * We cannot use vcpu_is_running() here because it acquires vcpu->mtx + * which is not allowed inside a critical section. + */ + hostcpu = vcpu->hostcpu; + if (hostcpu == NOCPU || hostcpu == curcpu) + return; + + ipi_cpu(hostcpu, vmm_ipinum); +} diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 1eba226..0150ebd 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -88,9 +88,6 @@ vmmdev_lookup(const char *name) static struct vmmdev_softc * vmmdev_lookup2(struct cdev *cdev) { -#ifdef notyet /* XXX kernel is not compiled with invariants */ - mtx_assert(&vmmdev_mtx, MA_OWNED); -#endif return (cdev->si_drv1); } @@ -141,7 +138,8 @@ static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { - int error, vcpu; + int error, vcpu, state_changed; + enum vcpu_state new_state; struct vmmdev_softc *sc; struct vm_memory_segment *seg; struct vm_register *vmreg; @@ -160,12 +158,12 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_stat_desc *statdesc; struct vm_x2apic *x2apic; - mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup2(cdev); - if (sc == NULL) { - mtx_unlock(&vmmdev_mtx); + if (sc == NULL) return (ENXIO); - } + + vcpu = -1; + state_changed = 0; /* * Some VMM ioctls can operate only on vcpus that are not running. @@ -181,6 +179,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_GET_CAPABILITY: case VM_SET_CAPABILITY: case VM_PPTDEV_MSI: + case VM_PPTDEV_MSIX: case VM_SET_X2APIC_STATE: /* * XXX fragile, handle with care @@ -192,11 +191,42 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, goto done; } - if (vcpu_is_running(sc->vm, vcpu, NULL)) { - error = EBUSY; + if (cmd == VM_RUN) + new_state = VCPU_RUNNING; + else + new_state = VCPU_CANNOT_RUN; + + error = vcpu_set_state(sc->vm, vcpu, new_state); + if (error) + goto done; + + state_changed = 1; + break; + + case VM_MAP_PPTDEV_MMIO: + case VM_BIND_PPTDEV: + case VM_UNBIND_PPTDEV: + case VM_MAP_MEMORY: + /* + * ioctls that operate on the entire virtual machine must + * prevent all vcpus from running. + */ + error = 0; + for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { + error = vcpu_set_state(sc->vm, vcpu, VCPU_CANNOT_RUN); + if (error) + break; + } + + if (error) { + while (--vcpu >= 0) + vcpu_set_state(sc->vm, vcpu, VCPU_IDLE); goto done; } + + state_changed = 2; break; + default: break; } @@ -204,14 +234,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, switch(cmd) { case VM_RUN: vmrun = (struct vm_run *)data; - - vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_RUNNING); - mtx_unlock(&vmmdev_mtx); - error = vm_run(sc->vm, vmrun); - - mtx_lock(&vmmdev_mtx); - vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_STOPPED); break; case VM_STAT_DESC: { const char *desc; @@ -346,9 +369,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = ENOTTY; break; } -done: - mtx_unlock(&vmmdev_mtx); + if (state_changed == 1) { + vcpu_set_state(sc->vm, vcpu, VCPU_IDLE); + } else if (state_changed == 2) { + for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) + vcpu_set_state(sc->vm, vcpu, VCPU_IDLE); + } + +done: return (error); } diff --git a/sys/amd64/vmm/vmm_ipi.c b/sys/amd64/vmm/vmm_ipi.c index 055f86f..643d326 100644 --- a/sys/amd64/vmm/vmm_ipi.c +++ b/sys/amd64/vmm/vmm_ipi.c @@ -38,7 +38,6 @@ __FBSDID("$FreeBSD$"); #include <machine/apicvar.h> #include <machine/segments.h> #include <machine/md_var.h> -#include <machine/smp.h> #include <machine/vmm.h> #include "vmm_ipi.h" @@ -48,7 +47,7 @@ extern inthand_t IDTVEC(rsvd), IDTVEC(justreturn); /* * The default is to use the IPI_AST to interrupt a vcpu. */ -static int ipinum = IPI_AST; +int vmm_ipinum = IPI_AST; CTASSERT(APIC_SPURIOUS_INT == 255); @@ -73,31 +72,22 @@ vmm_ipi_init(void) ip = &idt[idx]; func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); if (func == (uintptr_t)&IDTVEC(rsvd)) { - ipinum = idx; - setidt(ipinum, IDTVEC(justreturn), SDT_SYSIGT, + vmm_ipinum = idx; + setidt(vmm_ipinum, IDTVEC(justreturn), SDT_SYSIGT, SEL_KPL, 0); break; } } - if (ipinum != IPI_AST && bootverbose) { + if (vmm_ipinum != IPI_AST && bootverbose) { printf("vmm_ipi_init: installing ipi handler to interrupt " - "vcpus at vector %d\n", ipinum); + "vcpus at vector %d\n", vmm_ipinum); } } void vmm_ipi_cleanup(void) { - if (ipinum != IPI_AST) - setidt(ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); -} - -void -vm_interrupt_hostcpu(struct vm *vm, int vcpu) -{ - int hostcpu; - - if (vcpu_is_running(vm, vcpu, &hostcpu) && hostcpu != curcpu) - ipi_cpu(hostcpu, ipinum); + if (vmm_ipinum != IPI_AST) + setidt(vmm_ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); } diff --git a/sys/amd64/vmm/vmm_ipi.h b/sys/amd64/vmm/vmm_ipi.h index 7ab94bf..91552e3 100644 --- a/sys/amd64/vmm/vmm_ipi.h +++ b/sys/amd64/vmm/vmm_ipi.h @@ -31,8 +31,9 @@ struct vm; +extern int vmm_ipinum; + void vmm_ipi_init(void); void vmm_ipi_cleanup(void); -void vm_interrupt_hostcpu(struct vm *vm, int vcpu); #endif |