From 3a9eeaa765cdfd3940dbf4b881957c057c6d6576 Mon Sep 17 00:00:00 2001 From: neel Date: Mon, 11 Feb 2013 20:36:07 +0000 Subject: Implement guest vcpu pinning using 'pthread_setaffinity_np(3)'. Prior to this change pinning was implemented via an ioctl (VM_SET_PINNING) that called 'sched_bind()' on behalf of the user thread. The ULE implementation of 'sched_bind()' bumps up 'td_pinned' which in turn runs afoul of the assertion '(td_pinned == 0)' in userret(). Using the cpuset affinity to implement pinning of the vcpu threads works with both 4BSD and ULE schedulers and has the happy side-effect of getting rid of a bunch of code in vmm.ko. Discussed with: grehan --- sys/amd64/include/vmm.h | 2 -- sys/amd64/include/vmm_dev.h | 11 --------- sys/amd64/vmm/io/ppt.c | 29 ++-------------------- sys/amd64/vmm/vmm.c | 59 --------------------------------------------- sys/amd64/vmm/vmm_dev.c | 12 --------- 5 files changed, 2 insertions(+), 111 deletions(-) (limited to 'sys/amd64') diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index ec94083..6bd3566 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -102,8 +102,6 @@ int vm_get_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *ret_desc); int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc); -int vm_get_pinning(struct vm *vm, int vcpu, int *cpuid); -int vm_set_pinning(struct vm *vm, int vcpu, int cpuid); int vm_run(struct vm *vm, struct vm_run *vmrun); int vm_inject_event(struct vm *vm, int vcpu, int type, int vector, uint32_t error_code, int error_code_valid); diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index 2311673..0729927 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -51,11 +51,6 @@ struct vm_seg_desc { /* data or code segment */ struct seg_desc desc; }; -struct vm_pin { - int vm_cpuid; - int host_cpuid; /* -1 to unpin */ -}; - struct vm_run { int cpuid; uint64_t rip; /* start running here */ @@ -142,8 +137,6 @@ struct vm_x2apic { enum { IOCNUM_RUN, - IOCNUM_SET_PINNING, - IOCNUM_GET_PINNING, IOCNUM_MAP_MEMORY, IOCNUM_GET_MEMORY_SEG, IOCNUM_SET_REGISTER, @@ -168,10 +161,6 @@ enum { #define VM_RUN \ _IOWR('v', IOCNUM_RUN, struct vm_run) -#define VM_SET_PINNING \ - _IOW('v', IOCNUM_SET_PINNING, struct vm_pin) -#define VM_GET_PINNING \ - _IOWR('v', IOCNUM_GET_PINNING, struct vm_pin) #define VM_MAP_MEMORY \ _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment) #define VM_GET_MEMORY_SEG \ diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index 4a05985..5aedaf2 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -402,31 +402,6 @@ pptintr(void *arg) return (FILTER_HANDLED); } -/* - * XXX - * When we try to free the MSI resource the kernel will bind the thread to - * the host cpu was originally handling the MSI. The function freeing the - * MSI vector (apic_free_vector()) will panic the kernel if the thread - * is already bound to a cpu. - * - * So, we temporarily unbind the vcpu thread before freeing the MSI resource. - */ -static void -PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt) -{ - int pincpu = -1; - - vm_get_pinning(vm, vcpu, &pincpu); - - if (pincpu >= 0) - vm_set_pinning(vm, vcpu, -1); - - ppt_teardown_msi(ppt); - - if (pincpu >= 0) - vm_set_pinning(vm, vcpu, pincpu); -} - int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, int destcpu, int vector, int numvec) @@ -447,7 +422,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, return (EBUSY); /* Free any allocated resources */ - PPT_TEARDOWN_MSI(vm, vcpu, ppt); + ppt_teardown_msi(ppt); if (numvec == 0) /* nothing more to do */ return (0); @@ -513,7 +488,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, } if (i < numvec) { - PPT_TEARDOWN_MSI(vm, vcpu, ppt); + ppt_teardown_msi(ppt); return (ENXIO); } diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 82d4baa..85d277e 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -70,7 +70,6 @@ struct vcpu { int flags; enum vcpu_state state; struct mtx mtx; - int pincpu; /* host cpuid this vcpu is bound to */ int hostcpu; /* host cpuid this vcpu last ran on */ uint64_t guest_msrs[VMM_MSR_NUM]; struct vlapic *vlapic; @@ -81,18 +80,6 @@ struct vcpu { enum x2apic_state x2apic_state; int nmi_pending; }; -#define VCPU_F_PINNED 0x0001 - -#define VCPU_PINCPU(vm, vcpuid) \ - ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1) - -#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED) - -#define VCPU_PIN(vm, vcpuid, host_cpuid) \ -do { \ - vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \ - vm->vcpu[vcpuid].pincpu = host_cpuid; \ -} while(0) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) @@ -594,52 +581,6 @@ vm_set_seg_desc(struct vm *vm, int vcpu, int reg, return (VMSETDESC(vm->cookie, vcpu, reg, desc)); } -int -vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid) -{ - - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) - return (EINVAL); - - *cpuid = VCPU_PINCPU(vm, vcpuid); - - return (0); -} - -int -vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid) -{ - struct thread *td; - - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) - return (EINVAL); - - td = curthread; /* XXXSMP only safe when muxing vcpus */ - - /* unpin */ - if (host_cpuid < 0) { - VCPU_UNPIN(vm, vcpuid); - thread_lock(td); - sched_unbind(td); - thread_unlock(td); - return (0); - } - - if (CPU_ABSENT(host_cpuid)) - return (EINVAL); - - /* - * XXX we should check that 'host_cpuid' has not already been pinned - * by another vm. - */ - thread_lock(td); - sched_bind(td, host_cpuid); - thread_unlock(td); - VCPU_PIN(vm, vcpuid, host_cpuid); - - return (0); -} - static void restore_guest_fpustate(struct vcpu *vcpu) { diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 0150ebd..95527ae 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -144,7 +144,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_memory_segment *seg; struct vm_register *vmreg; struct vm_seg_desc* vmsegdesc; - struct vm_pin *vmpin; struct vm_run *vmrun; struct vm_event *vmevent; struct vm_lapic_irq *vmirq; @@ -170,7 +169,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, */ switch (cmd) { case VM_RUN: - case VM_SET_PINNING: case VM_GET_REGISTER: case VM_SET_REGISTER: case VM_GET_SEGMENT_DESCRIPTOR: @@ -301,16 +299,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vmirq = (struct vm_lapic_irq *)data; error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector); break; - case VM_SET_PINNING: - vmpin = (struct vm_pin *)data; - error = vm_set_pinning(sc->vm, vmpin->vm_cpuid, - vmpin->host_cpuid); - break; - case VM_GET_PINNING: - vmpin = (struct vm_pin *)data; - error = vm_get_pinning(sc->vm, vmpin->vm_cpuid, - &vmpin->host_cpuid); - break; case VM_MAP_MEMORY: seg = (struct vm_memory_segment *)data; error = vm_malloc(sc->vm, seg->gpa, seg->len); -- cgit v1.1