summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorneel <neel@FreeBSD.org>2012-10-12 18:32:44 +0000
committerneel <neel@FreeBSD.org>2012-10-12 18:32:44 +0000
commite3e8a520e280f32230da3ddfa4c5260fea0e15a1 (patch)
tree0bfaa9a4adf32c0b651f9959696d1a24545e8bd8
parent4829fce72f3af3b96bc018ac20f556ee33b7f44f (diff)
downloadFreeBSD-src-e3e8a520e280f32230da3ddfa4c5260fea0e15a1.zip
FreeBSD-src-e3e8a520e280f32230da3ddfa4c5260fea0e15a1.tar.gz
Provide per-vcpu locks instead of relying on a single big lock.
This also gets rid of all the witness.watch warnings related to calling malloc(M_WAITOK) while holding a mutex. Reviewed by: grehan
-rw-r--r--sys/amd64/include/vmm.h19
-rw-r--r--sys/amd64/vmm/intel/vmx.c4
-rw-r--r--sys/amd64/vmm/io/ppt.c2
-rw-r--r--sys/amd64/vmm/vmm.c91
-rw-r--r--sys/amd64/vmm/vmm_dev.c67
-rw-r--r--sys/amd64/vmm/vmm_ipi.c24
-rw-r--r--sys/amd64/vmm/vmm_ipi.h3
7 files changed, 134 insertions, 76 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index be22eec..4dfdd04 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -130,19 +130,24 @@ int vmm_is_pptdev(int bus, int slot, int func);
void *vm_iommu_domain(struct vm *vm);
-#define VCPU_STOPPED 0
-#define VCPU_RUNNING 1
-void vm_set_run_state(struct vm *vm, int vcpu, int running);
-int vm_get_run_state(struct vm *vm, int vcpu, int *hostcpu);
+enum vcpu_state {
+ VCPU_IDLE,
+ VCPU_RUNNING,
+ VCPU_CANNOT_RUN,
+};
-void *vcpu_stats(struct vm *vm, int vcpu);
+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state);
+enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu);
static int __inline
-vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
+vcpu_is_running(struct vm *vm, int vcpu)
{
- return (vm_get_run_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
+ return (vcpu_get_state(vm, vcpu) == VCPU_RUNNING);
}
+void *vcpu_stats(struct vm *vm, int vcpu);
+void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
+
#endif /* KERNEL */
#define VM_MAXCPU 8 /* maximum virtual cpus */
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 3fbe5a1..6a1dbed 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -1568,7 +1568,7 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
* vmcs_getreg will VMCLEAR the vmcs when it is done which will cause
* the subsequent vmlaunch/vmresume to fail.
*/
- if (vcpu_is_running(vmx->vm, vcpu, NULL))
+ if (vcpu_is_running(vmx->vm, vcpu))
panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
return (vmcs_getreg(&vmx->vmcs[vcpu], reg, retval));
@@ -1596,7 +1596,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
* vmcs_setreg will VMCLEAR the vmcs when it is done which will cause
* the subsequent vmlaunch/vmresume to fail.
*/
- if (vcpu_is_running(vmx->vm, vcpu, NULL))
+ if (vcpu_is_running(vmx->vm, vcpu))
panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
error = vmcs_setreg(&vmx->vmcs[vcpu], reg, val);
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
index d6fef9a..3044fc5 100644
--- a/sys/amd64/vmm/io/ppt.c
+++ b/sys/amd64/vmm/io/ppt.c
@@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
#include "iommu.h"
#include "ppt.h"
+/* XXX locking */
+
#define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0]))
#define MAX_MMIOSEGS (PCIR_MAX_BAR_0 + 1)
#define MAX_MSIMSGS 32
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 019b9a8..8d8f143 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vm.h>
#include <machine/pcb.h>
+#include <machine/smp.h>
#include <x86/apicreg.h>
#include <machine/vmm.h>
@@ -65,6 +66,8 @@ struct vlapic;
struct vcpu {
int flags;
+ enum vcpu_state state;
+ struct mtx mtx;
int pincpu; /* host cpuid this vcpu is bound to */
int hostcpu; /* host cpuid this vcpu last ran on */
uint64_t guest_msrs[VMM_MSR_NUM];
@@ -76,7 +79,6 @@ struct vcpu {
enum x2apic_state x2apic_state;
};
#define VCPU_F_PINNED 0x0001
-#define VCPU_F_RUNNING 0x0002
#define VCPU_PINCPU(vm, vcpuid) \
((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
@@ -89,6 +91,10 @@ do { \
vm->vcpu[vcpuid].pincpu = host_cpuid; \
} while(0)
+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_DEF)
+#define vcpu_lock(v) mtx_lock(&((v)->mtx))
+#define vcpu_unlock(v) mtx_unlock(&((v)->mtx))
+
#define VM_MAX_MEMORY_SEGMENTS 2
struct vm {
@@ -162,7 +168,8 @@ vcpu_init(struct vm *vm, uint32_t vcpu_id)
vcpu = &vm->vcpu[vcpu_id];
- vcpu->hostcpu = -1;
+ vcpu_lock_init(vcpu);
+ vcpu->hostcpu = NOCPU;
vcpu->vcpuid = vcpu_id;
vcpu->vlapic = vlapic_init(vm, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
@@ -667,11 +674,13 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
pcb = PCPU_GET(curpcb);
set_pcb_flags(pcb, PCB_FULL_IRET);
- vcpu->hostcpu = curcpu;
-
restore_guest_msrs(vm, vcpuid);
restore_guest_fpustate(vcpu);
+
+ vcpu->hostcpu = curcpu;
error = VMRUN(vm->cookie, vcpuid, vmrun->rip);
+ vcpu->hostcpu = NOCPU;
+
save_guest_fpustate(vcpu);
restore_host_msrs(vm, vcpuid);
@@ -787,9 +796,10 @@ vm_iommu_domain(struct vm *vm)
return (vm->iommu);
}
-void
-vm_set_run_state(struct vm *vm, int vcpuid, int state)
+int
+vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state)
{
+ int error;
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
@@ -797,43 +807,42 @@ vm_set_run_state(struct vm *vm, int vcpuid, int state)
vcpu = &vm->vcpu[vcpuid];
- if (state == VCPU_RUNNING) {
- if (vcpu->flags & VCPU_F_RUNNING) {
- panic("vm_set_run_state: %s[%d] is already running",
- vm_name(vm), vcpuid);
- }
- vcpu->flags |= VCPU_F_RUNNING;
+ vcpu_lock(vcpu);
+
+ /*
+ * The following state transitions are allowed:
+ * IDLE -> RUNNING -> IDLE
+ * IDLE -> CANNOT_RUN -> IDLE
+ */
+ if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) ||
+ (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) {
+ error = 0;
+ vcpu->state = state;
} else {
- if ((vcpu->flags & VCPU_F_RUNNING) == 0) {
- panic("vm_set_run_state: %s[%d] is already stopped",
- vm_name(vm), vcpuid);
- }
- vcpu->flags &= ~VCPU_F_RUNNING;
+ error = EBUSY;
}
+
+ vcpu_unlock(vcpu);
+
+ return (error);
}
-int
-vm_get_run_state(struct vm *vm, int vcpuid, int *cpuptr)
+enum vcpu_state
+vcpu_get_state(struct vm *vm, int vcpuid)
{
- int retval, hostcpu;
struct vcpu *vcpu;
+ enum vcpu_state state;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
- if (vcpu->flags & VCPU_F_RUNNING) {
- retval = VCPU_RUNNING;
- hostcpu = vcpu->hostcpu;
- } else {
- retval = VCPU_STOPPED;
- hostcpu = -1;
- }
- if (cpuptr)
- *cpuptr = hostcpu;
+ vcpu_lock(vcpu);
+ state = vcpu->state;
+ vcpu_unlock(vcpu);
- return (retval);
+ return (state);
}
void
@@ -884,3 +893,25 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
return (0);
}
+
+void
+vm_interrupt_hostcpu(struct vm *vm, int vcpuid)
+{
+ int hostcpu;
+ struct vcpu *vcpu;
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ /*
+ * XXX racy but the worst case is that we'll send an unnecessary IPI
+ * to the 'hostcpu'.
+ *
+ * We cannot use vcpu_is_running() here because it acquires vcpu->mtx
+ * which is not allowed inside a critical section.
+ */
+ hostcpu = vcpu->hostcpu;
+ if (hostcpu == NOCPU || hostcpu == curcpu)
+ return;
+
+ ipi_cpu(hostcpu, vmm_ipinum);
+}
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index 1eba226..0150ebd 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -88,9 +88,6 @@ vmmdev_lookup(const char *name)
static struct vmmdev_softc *
vmmdev_lookup2(struct cdev *cdev)
{
-#ifdef notyet /* XXX kernel is not compiled with invariants */
- mtx_assert(&vmmdev_mtx, MA_OWNED);
-#endif
return (cdev->si_drv1);
}
@@ -141,7 +138,8 @@ static int
vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct thread *td)
{
- int error, vcpu;
+ int error, vcpu, state_changed;
+ enum vcpu_state new_state;
struct vmmdev_softc *sc;
struct vm_memory_segment *seg;
struct vm_register *vmreg;
@@ -160,12 +158,12 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_stat_desc *statdesc;
struct vm_x2apic *x2apic;
- mtx_lock(&vmmdev_mtx);
sc = vmmdev_lookup2(cdev);
- if (sc == NULL) {
- mtx_unlock(&vmmdev_mtx);
+ if (sc == NULL)
return (ENXIO);
- }
+
+ vcpu = -1;
+ state_changed = 0;
/*
* Some VMM ioctls can operate only on vcpus that are not running.
@@ -181,6 +179,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
case VM_GET_CAPABILITY:
case VM_SET_CAPABILITY:
case VM_PPTDEV_MSI:
+ case VM_PPTDEV_MSIX:
case VM_SET_X2APIC_STATE:
/*
* XXX fragile, handle with care
@@ -192,11 +191,42 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
goto done;
}
- if (vcpu_is_running(sc->vm, vcpu, NULL)) {
- error = EBUSY;
+ if (cmd == VM_RUN)
+ new_state = VCPU_RUNNING;
+ else
+ new_state = VCPU_CANNOT_RUN;
+
+ error = vcpu_set_state(sc->vm, vcpu, new_state);
+ if (error)
+ goto done;
+
+ state_changed = 1;
+ break;
+
+ case VM_MAP_PPTDEV_MMIO:
+ case VM_BIND_PPTDEV:
+ case VM_UNBIND_PPTDEV:
+ case VM_MAP_MEMORY:
+ /*
+ * ioctls that operate on the entire virtual machine must
+ * prevent all vcpus from running.
+ */
+ error = 0;
+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
+ error = vcpu_set_state(sc->vm, vcpu, VCPU_CANNOT_RUN);
+ if (error)
+ break;
+ }
+
+ if (error) {
+ while (--vcpu >= 0)
+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
goto done;
}
+
+ state_changed = 2;
break;
+
default:
break;
}
@@ -204,14 +234,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
switch(cmd) {
case VM_RUN:
vmrun = (struct vm_run *)data;
-
- vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_RUNNING);
- mtx_unlock(&vmmdev_mtx);
-
error = vm_run(sc->vm, vmrun);
-
- mtx_lock(&vmmdev_mtx);
- vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_STOPPED);
break;
case VM_STAT_DESC: {
const char *desc;
@@ -346,9 +369,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = ENOTTY;
break;
}
-done:
- mtx_unlock(&vmmdev_mtx);
+ if (state_changed == 1) {
+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
+ } else if (state_changed == 2) {
+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE);
+ }
+
+done:
return (error);
}
diff --git a/sys/amd64/vmm/vmm_ipi.c b/sys/amd64/vmm/vmm_ipi.c
index 055f86f..643d326 100644
--- a/sys/amd64/vmm/vmm_ipi.c
+++ b/sys/amd64/vmm/vmm_ipi.c
@@ -38,7 +38,6 @@ __FBSDID("$FreeBSD$");
#include <machine/apicvar.h>
#include <machine/segments.h>
#include <machine/md_var.h>
-#include <machine/smp.h>
#include <machine/vmm.h>
#include "vmm_ipi.h"
@@ -48,7 +47,7 @@ extern inthand_t IDTVEC(rsvd), IDTVEC(justreturn);
/*
* The default is to use the IPI_AST to interrupt a vcpu.
*/
-static int ipinum = IPI_AST;
+int vmm_ipinum = IPI_AST;
CTASSERT(APIC_SPURIOUS_INT == 255);
@@ -73,31 +72,22 @@ vmm_ipi_init(void)
ip = &idt[idx];
func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
if (func == (uintptr_t)&IDTVEC(rsvd)) {
- ipinum = idx;
- setidt(ipinum, IDTVEC(justreturn), SDT_SYSIGT,
+ vmm_ipinum = idx;
+ setidt(vmm_ipinum, IDTVEC(justreturn), SDT_SYSIGT,
SEL_KPL, 0);
break;
}
}
- if (ipinum != IPI_AST && bootverbose) {
+ if (vmm_ipinum != IPI_AST && bootverbose) {
printf("vmm_ipi_init: installing ipi handler to interrupt "
- "vcpus at vector %d\n", ipinum);
+ "vcpus at vector %d\n", vmm_ipinum);
}
}
void
vmm_ipi_cleanup(void)
{
- if (ipinum != IPI_AST)
- setidt(ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
-}
-
-void
-vm_interrupt_hostcpu(struct vm *vm, int vcpu)
-{
- int hostcpu;
-
- if (vcpu_is_running(vm, vcpu, &hostcpu) && hostcpu != curcpu)
- ipi_cpu(hostcpu, ipinum);
+ if (vmm_ipinum != IPI_AST)
+ setidt(vmm_ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
}
diff --git a/sys/amd64/vmm/vmm_ipi.h b/sys/amd64/vmm/vmm_ipi.h
index 7ab94bf..91552e3 100644
--- a/sys/amd64/vmm/vmm_ipi.h
+++ b/sys/amd64/vmm/vmm_ipi.h
@@ -31,8 +31,9 @@
struct vm;
+extern int vmm_ipinum;
+
void vmm_ipi_init(void);
void vmm_ipi_cleanup(void);
-void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
#endif
OpenPOWER on IntegriCloud