Provide per-vcpu locks instead of relying on a single big lock.

This also gets rid of all the witness.watch warnings related to calling malloc(M_WAITOK) while holding a mutex. Reviewed by: grehan
author: neel <neel@FreeBSD.org> 2012-10-12 18:32:44 +0000
committer: neel <neel@FreeBSD.org> 2012-10-12 18:32:44 +0000
commit: e3e8a520e280f32230da3ddfa4c5260fea0e15a1 (patch)
tree: 0bfaa9a4adf32c0b651f9959696d1a24545e8bd8 /sys/amd64
parent: 4829fce72f3af3b96bc018ac20f556ee33b7f44f (diff)
download: FreeBSD-src-e3e8a520e280f32230da3ddfa4c5260fea0e15a1.zip
FreeBSD-src-e3e8a520e280f32230da3ddfa4c5260fea0e15a1.tar.gz
7 files changed, 134 insertions, 76 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index be22eec..4dfdd04 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -130,19 +130,24 @@ int vmm_is_pptdev(int bus, int slot, int func);
 
 void *vm_iommu_domain(struct vm *vm);
 
-#define	VCPU_STOPPED	0
-#define	VCPU_RUNNING	1
-void vm_set_run_state(struct vm *vm, int vcpu, int running);
-int vm_get_run_state(struct vm *vm, int vcpu, int *hostcpu);
+enum vcpu_state {
+	VCPU_IDLE,
+	VCPU_RUNNING,
+	VCPU_CANNOT_RUN,
+};
 
-void *vcpu_stats(struct vm *vm, int vcpu);
+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state);
+enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu);
 
 static int __inline
-vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
+vcpu_is_running(struct vm *vm, int vcpu)
 {
-	return (vm_get_run_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
+	return (vcpu_get_state(vm, vcpu) == VCPU_RUNNING);
 }
 
+void *vcpu_stats(struct vm *vm, int vcpu);
+void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
+
 #endif	/* KERNEL */
 
 #define	VM_MAXCPU	8			/* maximum virtual cpus */
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 3fbe5a1..6a1dbed 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -1568,7 +1568,7 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
 	 * vmcs_getreg will VMCLEAR the vmcs when it is done which will cause
 	 * the subsequent vmlaunch/vmresume to fail.
 	 */
-	if (vcpu_is_running(vmx->vm, vcpu, NULL))
+	if (vcpu_is_running(vmx->vm, vcpu))
 		panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
 
 	return (vmcs_getreg(&vmx->vmcs[vcpu], reg, retval));
@@ -1596,7 +1596,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
 	 * vmcs_setreg will VMCLEAR the vmcs when it is done which will cause
 	 * the subsequent vmlaunch/vmresume to fail.
 	 */
-	if (vcpu_is_running(vmx->vm, vcpu, NULL))
+	if (vcpu_is_running(vmx->vm, vcpu))
 		panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
 
 	error = vmcs_setreg(&vmx->vmcs[vcpu], reg, val);
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
index d6fef9a..3044fc5 100644
--- a/sys/amd64/vmm/io/ppt.c
+++ b/sys/amd64/vmm/io/ppt.c
@@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
 #include "iommu.h"
 #include "ppt.h"
 
+/* XXX locking */
+
 #define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
 #define	MAX_MMIOSEGS	(PCIR_MAX_BAR_0 + 1)
 #define	MAX_MSIMSGS	32
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 019b9a8..8d8f143 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/vm.h>
 #include <machine/pcb.h>
+#include <machine/smp.h>
 #include <x86/apicreg.h>
 
 #include <machine/vmm.h>
@@ -65,6 +66,8 @@ struct vlapic;
 
 struct vcpu {
 	int		flags;
+	enum vcpu_state	state;
+	struct mtx	mtx;
 	int		pincpu;		/* host cpuid this vcpu is bound to */
 	int		hostcpu;	/* host cpuid this vcpu last ran on */
 	uint64_t	guest_msrs[VMM_MSR_NUM];
@@ -76,7 +79,6 @@ struct vcpu {
 	enum x2apic_state x2apic_state;
 };
 #define	VCPU_F_PINNED	0x0001
-#define	VCPU_F_RUNNING	0x0002
 
 #define	VCPU_PINCPU(vm, vcpuid)	\
     ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
@@ -89,6 +91,10 @@ do {									\
 	vm->vcpu[vcpuid].pincpu = host_cpuid;				\
 } while(0)
 
+#define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_DEF)
+#define	vcpu_lock(v)		mtx_lock(&((v)->mtx))
+#define	vcpu_unlock(v)		mtx_unlock(&((v)->mtx))
+
 #define	VM_MAX_MEMORY_SEGMENTS	2
 
 struct vm {
@@ -162,7 +168,8 @@ vcpu_init(struct vm *vm, uint32_t vcpu_id)
 	
 	vcpu = &vm->vcpu[vcpu_id];
 
-	vcpu->hostcpu = -1;
+	vcpu_lock_init(vcpu);
+	vcpu->hostcpu = NOCPU;
 	vcpu->vcpuid = vcpu_id;
 	vcpu->vlapic = vlapic_init(vm, vcpu_id);
 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
@@ -667,11 +674,13 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
 	pcb = PCPU_GET(curpcb);
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
-	vcpu->hostcpu = curcpu;
-
 	restore_guest_msrs(vm, vcpuid);	
 	restore_guest_fpustate(vcpu);
+
+	vcpu->hostcpu = curcpu;
 	error = VMRUN(vm->cookie, vcpuid, vmrun->rip);
+	vcpu->hostcpu = NOCPU;
+
 	save_guest_fpustate(vcpu);
 	restore_host_msrs(vm, vcpuid);
 
@@ -787,9 +796,10 @@ vm_iommu_domain(struct vm *vm)
 	return (vm->iommu);
 }
 
-void
-vm_set_run_state(struct vm *vm, int vcpuid, int state)
+int
+vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state)
 {
+	int error;
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
@@ -797,43 +807,42 @@ vm_set_run_state(struct vm *vm, int vcpuid, int state)
 
 	vcpu = &vm->vcpu[vcpuid];
 
-	if (state == VCPU_RUNNING) {
-		if (vcpu->flags & VCPU_F_RUNNING) {
-			panic("vm_set_run_state: %s[%d] is already running",
-			      vm_name(vm), vcpuid);
-		}
-		vcpu->flags |= VCPU_F_RUNNING;
+	vcpu_lock(vcpu);
+
+	/*
+	 * The following state transitions are allowed:
+	 * IDLE -> RUNNING -> IDLE
+	 * IDLE -> CANNOT_RUN -> IDLE
+	 */
+	if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) ||
+	    (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) {
+		error = 0;
+		vcpu->state = state;
 	} else {
-		if ((vcpu->flags & VCPU_F_RUNNING) == 0) {
-			panic("vm_set_run_state: %s[%d] is already stopped",
-			      vm_name(vm), vcpuid);
-		}
-		vcpu->flags &= ~VCPU_F_RUNNING;
+		error = EBUSY;
 	}
+
+	vcpu_unlock(vcpu);
+
+	return (error);
 }
 
-int
-vm_get_run_state(struct vm *vm, int vcpuid, int *cpuptr)
+enum vcpu_state
+vcpu_get_state(struct vm *vm, int vcpuid)
 {
-	int retval, hostcpu;
 	struct vcpu *vcpu;
+	enum vcpu_state state;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
-	if (vcpu->flags & VCPU_F_RUNNING) {
-		retval = VCPU_RUNNING;
-		hostcpu = vcpu->hostcpu;
-	} else {
-		retval = VCPU_STOPPED;
-		hostcpu = -1;
-	}
 
-	if (cpuptr)
-		*cpuptr = hostcpu;
+	vcpu_lock(vcpu);
+	state = vcpu->state;
+	vcpu_unlock(vcpu);
 
-	return (retval);
+	return (state);
 }
 
 void
@@ -884,3 +893,25 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
 
 	return (0);
 }
+
+void
+vm_interrupt_hostcpu(struct vm *vm, int vcpuid)
+{
+	int hostcpu;
+	struct vcpu *vcpu;
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	/*
+	 * XXX racy but the worst case is that we'll send an unnecessary IPI
+	 * to the 'hostcpu'.
+	 *
+	 * We cannot use vcpu_is_running() here because it acquires vcpu->mtx
+	 * which is not allowed inside a critical section.
+	 */
+	hostcpu = vcpu->hostcpu;
+	if (hostcpu == NOCPU || hostcpu == curcpu)
+		return;
+
+	ipi_cpu(hostcpu, vmm_ipinum);
+}
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index 1eba226..0150ebd 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -88,9 +88,6 @@ vmmdev_lookup(const char *name)
 static struct vmmdev_softc *
 vmmdev_lookup2(struct cdev *cdev)
 {
-#ifdef notyet	/* XXX kernel is not compiled with invariants */
-	mtx_assert(&vmmdev_mtx, MA_OWNED);
-#endif
 
 	return (cdev->si_drv1);
 }
@@ -141,7 +138,8 @@ static int
 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	     struct thread *td)
 {
-	int error, vcpu;
+	int error, vcpu, state_changed;
+	enum vcpu_state new_state;
 	struct vmmdev_softc *sc;
 	struct vm_memory_segment *seg;
 	struct vm_register *vmreg;
@@ -160,12 +158,12 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	struct vm_stat_desc *statdesc;
 	struct vm_x2apic *x2apic;
 
-	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup2(cdev);
-	if (sc == NULL) {
-		mtx_unlock(&vmmdev_mtx);
+	if (sc == NULL)
 		return (ENXIO);
-	}
+
+	vcpu = -1;
+	state_changed = 0;
 
 	/*
 	 * Some VMM ioctls can operate only on vcpus that are not running.
@@ -181,6 +179,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	case VM_GET_CAPABILITY:
 	case VM_SET_CAPABILITY:
 	case VM_PPTDEV_MSI:
+	case VM_PPTDEV_MSIX:
 	case VM_SET_X2APIC_STATE:
 		/*
 		 * XXX fragile, handle with care
@@ -192,11 +191,42 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 			goto done;
 		}
 
-		if (vcpu_is_running(sc->vm, vcpu, NULL)) {
-			error = EBUSY;
+		if (cmd == VM_RUN)
+			new_state = VCPU_RUNNING;
+		else
+			new_state = VCPU_CANNOT_RUN;
+
+		error = vcpu_set_state(sc->vm, vcpu, new_state);
+		if (error)
+			goto done;
+
+		state_changed = 1;
+		break;
+
+	case VM_MAP_PPTDEV_MMIO:
+	case VM_BIND_PPTDEV:
+	case VM_UNBIND_PPTDEV:
+	case VM_MAP_MEMORY:
+		/*
+		 * ioctls that operate on the entire virtual machine must
+		 * prevent all vcpus from running.
+		 */
+		error = 0;
+		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
+			error = vcpu_set_state(sc->vm, vcpu, VCPU_CANNOT_RUN);
+			if (error)
+				break;
+		}
+
+		if (error) {
+			while (--vcpu >= 0)
+				vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
 			goto done;
 		}
+
+		state_changed = 2;
 		break;
+
 	default:
 		break;
 	}
@@ -204,14 +234,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	switch(cmd) {
 	case VM_RUN:
 		vmrun = (struct vm_run *)data;
-
-		vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_RUNNING);
-		mtx_unlock(&vmmdev_mtx);
-
 		error = vm_run(sc->vm, vmrun);
-
-		mtx_lock(&vmmdev_mtx);
-		vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_STOPPED);
 		break;
 	case VM_STAT_DESC: {
 		const char *desc;
@@ -346,9 +369,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 		error = ENOTTY;
 		break;
 	}
-done:
-	mtx_unlock(&vmmdev_mtx);
 
+	if (state_changed == 1) {
+		vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
+	} else if (state_changed == 2) {
+		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
+			vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
+	}
+
+done:
 	return (error);
 }
 
diff --git a/sys/amd64/vmm/vmm_ipi.c b/sys/amd64/vmm/vmm_ipi.c
index 055f86f..643d326 100644
--- a/sys/amd64/vmm/vmm_ipi.c
+++ b/sys/amd64/vmm/vmm_ipi.c
@@ -38,7 +38,6 @@ __FBSDID("$FreeBSD$");
 #include <machine/apicvar.h>
 #include <machine/segments.h>
 #include <machine/md_var.h>
-#include <machine/smp.h>
 
 #include <machine/vmm.h>
 #include "vmm_ipi.h"
@@ -48,7 +47,7 @@ extern inthand_t IDTVEC(rsvd), IDTVEC(justreturn);
 /*
  * The default is to use the IPI_AST to interrupt a vcpu.
  */
-static int ipinum = IPI_AST;
+int vmm_ipinum = IPI_AST;
 
 CTASSERT(APIC_SPURIOUS_INT == 255);
 
@@ -73,31 +72,22 @@ vmm_ipi_init(void)
 		ip = &idt[idx];
 		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
 		if (func == (uintptr_t)&IDTVEC(rsvd)) {
-			ipinum = idx;
-			setidt(ipinum, IDTVEC(justreturn), SDT_SYSIGT,
+			vmm_ipinum = idx;
+			setidt(vmm_ipinum, IDTVEC(justreturn), SDT_SYSIGT,
 			       SEL_KPL, 0);
 			break;
 		}
 	}
 	
-	if (ipinum != IPI_AST && bootverbose) {
+	if (vmm_ipinum != IPI_AST && bootverbose) {
 		printf("vmm_ipi_init: installing ipi handler to interrupt "
-		       "vcpus at vector %d\n", ipinum);
+		       "vcpus at vector %d\n", vmm_ipinum);
 	}
 }
 
 void
 vmm_ipi_cleanup(void)
 {
-	if (ipinum != IPI_AST)
-		setidt(ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
-}
-
-void
-vm_interrupt_hostcpu(struct vm *vm, int vcpu)
-{
-	int hostcpu;
-
-	if (vcpu_is_running(vm, vcpu, &hostcpu) && hostcpu != curcpu)
-		ipi_cpu(hostcpu, ipinum);
+	if (vmm_ipinum != IPI_AST)
+		setidt(vmm_ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
 }
diff --git a/sys/amd64/vmm/vmm_ipi.h b/sys/amd64/vmm/vmm_ipi.h
index 7ab94bf..91552e3 100644
--- a/sys/amd64/vmm/vmm_ipi.h
+++ b/sys/amd64/vmm/vmm_ipi.h
@@ -31,8 +31,9 @@
 
 struct vm;
 
+extern int vmm_ipinum;
+
 void	vmm_ipi_init(void);
 void	vmm_ipi_cleanup(void);
-void	vm_interrupt_hostcpu(struct vm *vm, int vcpu);
 
 #endif
author	neel <neel@FreeBSD.org>	2012-10-12 18:32:44 +0000
committer	neel <neel@FreeBSD.org>	2012-10-12 18:32:44 +0000
commit	e3e8a520e280f32230da3ddfa4c5260fea0e15a1 (patch)
tree	0bfaa9a4adf32c0b651f9959696d1a24545e8bd8 /sys/amd64
parent	4829fce72f3af3b96bc018ac20f556ee33b7f44f (diff)
download	FreeBSD-src-e3e8a520e280f32230da3ddfa4c5260fea0e15a1.zip FreeBSD-src-e3e8a520e280f32230da3ddfa4c5260fea0e15a1.tar.gz