From 3a9eeaa765cdfd3940dbf4b881957c057c6d6576 Mon Sep 17 00:00:00 2001
From: neel <neel@FreeBSD.org>
Date: Mon, 11 Feb 2013 20:36:07 +0000
Subject: Implement guest vcpu pinning using 'pthread_setaffinity_np(3)'.

Prior to this change pinning was implemented via an ioctl (VM_SET_PINNING)
that called 'sched_bind()' on behalf of the user thread.

The ULE implementation of 'sched_bind()' bumps up 'td_pinned' which in turn
runs afoul of the assertion '(td_pinned == 0)' in userret().

Using the cpuset affinity to implement pinning of the vcpu threads works with
both 4BSD and ULE schedulers and has the happy side-effect of getting rid
of a bunch of code in vmm.ko.

Discussed with:	grehan
---
 sys/amd64/include/vmm.h     |  2 --
 sys/amd64/include/vmm_dev.h | 11 ---------
 sys/amd64/vmm/io/ppt.c      | 29 ++--------------------
 sys/amd64/vmm/vmm.c         | 59 ---------------------------------------------
 sys/amd64/vmm/vmm_dev.c     | 12 ---------
 5 files changed, 2 insertions(+), 111 deletions(-)

(limited to 'sys/amd64')

diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index ec94083..6bd3566 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -102,8 +102,6 @@ int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *ret_desc);
 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *desc);
-int vm_get_pinning(struct vm *vm, int vcpu, int *cpuid);
-int vm_set_pinning(struct vm *vm, int vcpu, int cpuid);
 int vm_run(struct vm *vm, struct vm_run *vmrun);
 int vm_inject_event(struct vm *vm, int vcpu, int type,
 		    int vector, uint32_t error_code, int error_code_valid);
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
index 2311673..0729927 100644
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -51,11 +51,6 @@ struct vm_seg_desc {			/* data or code segment */
 	struct seg_desc desc;
 };
 
-struct vm_pin {
-	int		vm_cpuid;
-	int		host_cpuid;	/* -1 to unpin */
-};
-
 struct vm_run {
 	int		cpuid;
 	uint64_t	rip;		/* start running here */
@@ -142,8 +137,6 @@ struct vm_x2apic {
 
 enum {
 	IOCNUM_RUN,
-	IOCNUM_SET_PINNING,
-	IOCNUM_GET_PINNING,
 	IOCNUM_MAP_MEMORY,
 	IOCNUM_GET_MEMORY_SEG,
 	IOCNUM_SET_REGISTER,
@@ -168,10 +161,6 @@ enum {
 
 #define	VM_RUN		\
 	_IOWR('v', IOCNUM_RUN, struct vm_run)
-#define	VM_SET_PINNING	\
-	_IOW('v', IOCNUM_SET_PINNING, struct vm_pin)
-#define	VM_GET_PINNING	\
-	_IOWR('v', IOCNUM_GET_PINNING, struct vm_pin)
 #define	VM_MAP_MEMORY	\
 	_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
 #define	VM_GET_MEMORY_SEG \
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
index 4a05985..5aedaf2 100644
--- a/sys/amd64/vmm/io/ppt.c
+++ b/sys/amd64/vmm/io/ppt.c
@@ -402,31 +402,6 @@ pptintr(void *arg)
 		return (FILTER_HANDLED);
 }
 
-/*
- * XXX
- * When we try to free the MSI resource the kernel will bind the thread to
- * the host cpu was originally handling the MSI. The function freeing the
- * MSI vector (apic_free_vector()) will panic the kernel if the thread
- * is already bound to a cpu.
- * 
- * So, we temporarily unbind the vcpu thread before freeing the MSI resource.
- */
-static void
-PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
-{
-	int pincpu = -1;
-
-	vm_get_pinning(vm, vcpu, &pincpu);
-
-	if (pincpu >= 0)
-		vm_set_pinning(vm, vcpu, -1);
-
-	ppt_teardown_msi(ppt);
-
-	if (pincpu >= 0)
-		vm_set_pinning(vm, vcpu, pincpu);
-}
-
 int
 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
 	      int destcpu, int vector, int numvec)
@@ -447,7 +422,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
 		return (EBUSY);
 
 	/* Free any allocated resources */
-	PPT_TEARDOWN_MSI(vm, vcpu, ppt);
+	ppt_teardown_msi(ppt);
 
 	if (numvec == 0)		/* nothing more to do */
 		return (0);
@@ -513,7 +488,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
 	}
 	
 	if (i < numvec) {
-		PPT_TEARDOWN_MSI(vm, vcpu, ppt);
+		ppt_teardown_msi(ppt);
 		return (ENXIO);
 	}
 
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 82d4baa..85d277e 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -70,7 +70,6 @@ struct vcpu {
 	int		flags;
 	enum vcpu_state	state;
 	struct mtx	mtx;
-	int		pincpu;		/* host cpuid this vcpu is bound to */
 	int		hostcpu;	/* host cpuid this vcpu last ran on */
 	uint64_t	guest_msrs[VMM_MSR_NUM];
 	struct vlapic	*vlapic;
@@ -81,18 +80,6 @@ struct vcpu {
 	enum x2apic_state x2apic_state;
 	int		nmi_pending;
 };
-#define	VCPU_F_PINNED	0x0001
-
-#define	VCPU_PINCPU(vm, vcpuid)	\
-    ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
-
-#define	VCPU_UNPIN(vm, vcpuid)	(vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
-
-#define	VCPU_PIN(vm, vcpuid, host_cpuid)				\
-do {									\
-	vm->vcpu[vcpuid].flags |= VCPU_F_PINNED;			\
-	vm->vcpu[vcpuid].pincpu = host_cpuid;				\
-} while(0)
 
 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
@@ -594,52 +581,6 @@ vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
 }
 
-int
-vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
-{
-
-	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
-		return (EINVAL);
-
-	*cpuid = VCPU_PINCPU(vm, vcpuid);
-
-	return (0);
-}
-
-int
-vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
-{
-	struct thread *td;
-
-	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
-		return (EINVAL);
-
-	td = curthread;		/* XXXSMP only safe when muxing vcpus */
-
-	/* unpin */
-	if (host_cpuid < 0) {
-		VCPU_UNPIN(vm, vcpuid);
-		thread_lock(td);
-		sched_unbind(td);
-		thread_unlock(td);
-		return (0);
-	}
-
-	if (CPU_ABSENT(host_cpuid))
-		return (EINVAL);
-
-	/*
-	 * XXX we should check that 'host_cpuid' has not already been pinned
-	 * by another vm.
-	 */
-	thread_lock(td);
-	sched_bind(td, host_cpuid);
-	thread_unlock(td);
-	VCPU_PIN(vm, vcpuid, host_cpuid);
-
-	return (0);
-}
-
 static void
 restore_guest_fpustate(struct vcpu *vcpu)
 {
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index 0150ebd..95527ae 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -144,7 +144,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	struct vm_memory_segment *seg;
 	struct vm_register *vmreg;
 	struct vm_seg_desc* vmsegdesc;
-	struct vm_pin *vmpin;
 	struct vm_run *vmrun;
 	struct vm_event *vmevent;
 	struct vm_lapic_irq *vmirq;
@@ -170,7 +169,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	 */
 	switch (cmd) {
 	case VM_RUN:
-	case VM_SET_PINNING:
 	case VM_GET_REGISTER:
 	case VM_SET_REGISTER:
 	case VM_GET_SEGMENT_DESCRIPTOR:
@@ -301,16 +299,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
 		break;
-	case VM_SET_PINNING:
-		vmpin = (struct vm_pin *)data;
-		error = vm_set_pinning(sc->vm, vmpin->vm_cpuid,
-				       vmpin->host_cpuid);
-		break;
-	case VM_GET_PINNING:
-		vmpin = (struct vm_pin *)data;
-		error = vm_get_pinning(sc->vm, vmpin->vm_cpuid,
-				       &vmpin->host_cpuid);
-		break;
 	case VM_MAP_MEMORY:
 		seg = (struct vm_memory_segment *)data;
 		error = vm_malloc(sc->vm, seg->gpa, seg->len);
-- 
cgit v1.1