24 files changed, 1040 insertions, 369 deletions
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
index c2851a4..6fec469 100644
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -397,6 +397,30 @@ vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
 }
 
 int
+vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector)
+{
+	struct vm_lapic_irq vmirq;
+
+	bzero(&vmirq, sizeof(vmirq));
+	vmirq.cpuid = vcpu;
+	vmirq.vector = vector;
+
+	return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq));
+}
+
+int
+vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
+{
+	struct vm_lapic_msi vmmsi;
+
+	bzero(&vmmsi, sizeof(vmmsi));
+	vmmsi.addr = addr;
+	vmmsi.msg = msg;
+
+	return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
+}
+
+int
 vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
@@ -551,8 +575,8 @@ vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 }
 
 int
-vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-	     int destcpu, int vector, int numvec)
+vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
+    uint64_t addr, uint64_t msg, int numvec)
 {
 	struct vm_pptdev_msi pptmsi;
 
@@ -561,16 +585,16 @@ vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
 	pptmsi.bus = bus;
 	pptmsi.slot = slot;
 	pptmsi.func = func;
-	pptmsi.destcpu = destcpu;
-	pptmsi.vector = vector;
+	pptmsi.msg = msg;
+	pptmsi.addr = addr;
 	pptmsi.numvec = numvec;
 
 	return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
 }
 
 int	
-vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-	      int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
+vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
+    int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 {
 	struct vm_pptdev_msix pptmsix;
 
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
index 293c431..69762c7 100644
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -67,6 +67,8 @@ int	vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type,
 int	vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
 			 int vector, int error_code);
 int	vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
+int	vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector);
+int	vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
 int	vm_ioapic_assert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_deassert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_pulse_irq(struct vmctx *ctx, int irq);
@@ -81,10 +83,11 @@ int	vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 			   vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
-int	vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-		     int dest, int vector, int numvec);
-int	vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-		      int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
+int	vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
+	    int func, uint64_t addr, uint64_t msg, int numvec);
+int	vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
+	    int func, int idx, uint64_t addr, uint64_t msg,
+	    uint32_t vector_control);
 
 /*
  * Return a pointer to the statistics buffer. Note that this is not MT-safe.
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index f7acb62..d6f1a5a 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -158,7 +158,7 @@ vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 }
 
 void *vcpu_stats(struct vm *vm, int vcpu);
-void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
+void vcpu_notify_event(struct vm *vm, int vcpuid);
 struct vmspace *vm_get_vmspace(struct vm *vm);
 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
@@ -266,6 +266,7 @@ enum vm_exitcode {
 	VM_EXITCODE_PAGING,
 	VM_EXITCODE_INST_EMUL,
 	VM_EXITCODE_SPINUP_AP,
+	VM_EXITCODE_SPINDOWN_CPU,
 	VM_EXITCODE_MAX
 };
 
@@ -310,6 +311,9 @@ struct vm_exit {
 			int		vcpu;
 			uint64_t	rip;
 		} spinup_ap;
+		struct {
+			uint64_t	rflags;
+		} hlt;
 	} u;
 };
 
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
index 19a5b02..454c411 100644
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -66,6 +66,11 @@ struct vm_event {
 	int		error_code_valid;
 };
 
+struct vm_lapic_msi {
+	uint64_t	msg;
+	uint64_t	addr;
+};
+
 struct vm_lapic_irq {
 	int		cpuid;
 	int		vector;
@@ -103,8 +108,8 @@ struct vm_pptdev_msi {
 	int		slot;
 	int		func;
 	int		numvec;		/* 0 means disabled */
-	int		vector;
-	int		destcpu;
+	uint64_t	msg;
+	uint64_t	addr;
 };
 
 struct vm_pptdev_msix {
@@ -113,7 +118,7 @@ struct vm_pptdev_msix {
 	int		slot;
 	int		func;
 	int		idx;
-	uint32_t	msg;
+	uint64_t	msg;
 	uint32_t	vector_control;
 	uint64_t	addr;
 };
@@ -175,6 +180,8 @@ enum {
 	IOCNUM_IOAPIC_ASSERT_IRQ = 33,
 	IOCNUM_IOAPIC_DEASSERT_IRQ = 34,
 	IOCNUM_IOAPIC_PULSE_IRQ = 35,
+	IOCNUM_LAPIC_MSI = 36,
+	IOCNUM_LAPIC_LOCAL_IRQ = 37, 
 
 	/* PCI pass-thru */
 	IOCNUM_BIND_PPTDEV = 40,
@@ -211,6 +218,10 @@ enum {
 	_IOW('v', IOCNUM_INJECT_EVENT, struct vm_event)
 #define	VM_LAPIC_IRQ 		\
 	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
+#define	VM_LAPIC_LOCAL_IRQ 	\
+	_IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq)
+#define	VM_LAPIC_MSI		\
+	_IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi)
 #define	VM_IOAPIC_ASSERT_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_DEASSERT_IRQ	\
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 9071f3e..10e83ea 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -1359,7 +1359,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 	struct vmcs *vmcs;
 	struct vmxctx *vmxctx;
 	uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason;
-	uint64_t qual, gpa;
+	uint64_t qual, gpa, rflags;
+	bool retu;
 
 	handled = 0;
 	vmcs = &vmx->vmcs[vcpu];
@@ -1405,31 +1406,46 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		break;
 	case EXIT_REASON_RDMSR:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1);
+		retu = false;
 		ecx = vmxctx->guest_rcx;
-		error = emulate_rdmsr(vmx->vm, vcpu, ecx);
+		error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu);
 		if (error) {
 			vmexit->exitcode = VM_EXITCODE_RDMSR;
 			vmexit->u.msr.code = ecx;
-		} else
+		} else if (!retu) {
 			handled = 1;
+		} else {
+			/* Return to userspace with a valid exitcode */
+			KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+			    ("emulate_wrmsr retu with bogus exitcode"));
+		}
 		break;
 	case EXIT_REASON_WRMSR:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1);
+		retu = false;
 		eax = vmxctx->guest_rax;
 		ecx = vmxctx->guest_rcx;
 		edx = vmxctx->guest_rdx;
 		error = emulate_wrmsr(vmx->vm, vcpu, ecx,
-					(uint64_t)edx << 32 | eax);
+		    (uint64_t)edx << 32 | eax, &retu);
 		if (error) {
 			vmexit->exitcode = VM_EXITCODE_WRMSR;
 			vmexit->u.msr.code = ecx;
 			vmexit->u.msr.wval = (uint64_t)edx << 32 | eax;
-		} else
+		} else if (!retu) {
 			handled = 1;
+		} else {
+			/* Return to userspace with a valid exitcode */
+			KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+			    ("emulate_wrmsr retu with bogus exitcode"));
+		}
 		break;
 	case EXIT_REASON_HLT:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);
+		if ((error = vmread(VMCS_GUEST_RFLAGS, &rflags)) != 0)
+			panic("vmx_exit_process: vmread(rflags) %d", error);
 		vmexit->exitcode = VM_EXITCODE_HLT;
+		vmexit->u.hlt.rflags = rflags;
 		break;
 	case EXIT_REASON_MTF:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
@@ -1584,7 +1600,6 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap)
 		panic("vmx_run: error %d setting up pcpu defaults", error);
 
 	do {
-		lapic_timer_tick(vmx->vm, vcpu);
 		vmx_inject_interrupts(vmx, vcpu);
 		vmx_run_trace(vmx, vcpu);
 		rc = vmx_setjmp(vmxctx);
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
index fce4bbd..32d59a0 100644
--- a/sys/amd64/vmm/io/ppt.c
+++ b/sys/amd64/vmm/io/ppt.c
@@ -72,8 +72,8 @@ MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
 
 struct pptintr_arg {				/* pptintr(pptintr_arg) */
 	struct pptdev	*pptdev;
-	int		vec;
-	int 		vcpu;
+	uint64_t	addr;
+	uint64_t	msg_data;
 };
 
 static struct pptdev {
@@ -412,16 +412,14 @@ ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
 static int
 pptintr(void *arg)
 {
-	int vec;
 	struct pptdev *ppt;
 	struct pptintr_arg *pptarg;
 	
 	pptarg = arg;
 	ppt = pptarg->pptdev;
-	vec = pptarg->vec;
 
 	if (ppt->vm != NULL)
-		lapic_intr_edge(ppt->vm, pptarg->vcpu, vec);
+		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
 	else {
 		/*
 		 * XXX
@@ -441,15 +439,13 @@ pptintr(void *arg)
 
 int
 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
-	      int destcpu, int vector, int numvec)
+	      uint64_t addr, uint64_t msg, int numvec)
 {
 	int i, rid, flags;
 	int msi_count, startrid, error, tmp;
 	struct pptdev *ppt;
 
-	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
-	    (vector < 0 || vector > 255) ||
-	    (numvec < 0 || numvec > MAX_MSIMSGS))
+	if (numvec < 0 || numvec > MAX_MSIMSGS)
 		return (EINVAL);
 
 	ppt = ppt_find(bus, slot, func);
@@ -513,8 +509,8 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
 			break;
 
 		ppt->msi.arg[i].pptdev = ppt;
-		ppt->msi.arg[i].vec = vector + i;
-		ppt->msi.arg[i].vcpu = destcpu;
+		ppt->msi.arg[i].addr = addr;
+		ppt->msi.arg[i].msg_data = msg + i;
 
 		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
 				       INTR_TYPE_NET | INTR_MPSAFE,
@@ -534,7 +530,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
 
 int
 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
-	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
+	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 {
 	struct pptdev *ppt;
 	struct pci_devinfo *dinfo;
@@ -605,8 +601,8 @@ ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
 			return (ENXIO);
 	
 		ppt->msix.arg[idx].pptdev = ppt;
-		ppt->msix.arg[idx].vec = msg & 0xFF;
-		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
+		ppt->msix.arg[idx].addr = addr;
+		ppt->msix.arg[idx].msg_data = msg;
 	
 		/* Setup the MSI-X interrupt */
 		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
diff --git a/sys/amd64/vmm/io/ppt.h b/sys/amd64/vmm/io/ppt.h
index 7670bc4..45ba323 100644
--- a/sys/amd64/vmm/io/ppt.h
+++ b/sys/amd64/vmm/io/ppt.h
@@ -33,9 +33,9 @@ int	ppt_unassign_all(struct vm *vm);
 int	ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
 		     vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int	ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
-		      int destcpu, int vector, int numvec);
+		      uint64_t addr, uint64_t msg, int numvec);
 int	ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
-		int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
+		int idx, uint64_t addr, uint64_t msg, uint32_t vector_control);
 int	ppt_num_devices(struct vm *vm);
 boolean_t ppt_is_mmio(struct vm *vm, vm_paddr_t gpa);
 
diff --git a/sys/amd64/vmm/io/vhpet.c b/sys/amd64/vmm/io/vhpet.c
index 112480ee..929b343 100644
--- a/sys/amd64/vmm/io/vhpet.c
+++ b/sys/amd64/vmm/io/vhpet.c
@@ -240,8 +240,7 @@ vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
 static void
 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
 {
-	int apicid, vector, vcpuid, pin;
-	cpuset_t dmask;
+	int pin;
 
 	/* If interrupts are not enabled for this timer then just return. */
 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
@@ -256,26 +255,8 @@ vhpet_timer_interrupt(struct vhpet *vhpet, int n)
 	}
 
 	if (vhpet_timer_msi_enabled(vhpet, n)) {
-		/*
-		 * XXX should have an API 'vlapic_deliver_msi(vm, addr, data)'
-		 * - assuming physical delivery mode
-		 * - no need to interpret contents of 'msireg' here
-		 */
-		vector = vhpet->timer[n].msireg & 0xff;
-		apicid = (vhpet->timer[n].msireg >> (32 + 12)) & 0xff;
-		if (apicid != 0xff) {
-			/* unicast */
-			vcpuid = vm_apicid2vcpuid(vhpet->vm, apicid);
-			lapic_intr_edge(vhpet->vm, vcpuid, vector);
-		} else {
-			/* broadcast */
-			dmask = vm_active_cpus(vhpet->vm);
-			while ((vcpuid = CPU_FFS(&dmask)) != 0) {
-				vcpuid--;
-				CPU_CLR(vcpuid, &dmask);
-				lapic_intr_edge(vhpet->vm, vcpuid, vector);
-			}
-		}
+		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
+		    vhpet->timer[n].msireg & 0xffffffff);
 		return;
 	}	
 
diff --git a/sys/amd64/vmm/io/vioapic.c b/sys/amd64/vmm/io/vioapic.c
index 167e8ab..151065a 100644
--- a/sys/amd64/vmm/io/vioapic.c
+++ b/sys/amd64/vmm/io/vioapic.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 
 #include "vmm_ktr.h"
 #include "vmm_lapic.h"
+#include "vlapic.h"
 #include "vioapic.h"
 
 #define	IOREGSEL	0x00
@@ -91,25 +92,14 @@ pinstate_str(bool asserted)
 	else
 		return ("deasserted");
 }
-
-static const char *
-trigger_str(bool level)
-{
-
-	if (level)
-		return ("level");
-	else
-		return ("edge");
-}
 #endif
 
 static void
 vioapic_send_intr(struct vioapic *vioapic, int pin)
 {
-	int vector, apicid, vcpuid;
-	uint32_t low, high;
-	cpuset_t dmask;
-	bool level;
+	int vector, delmode;
+	uint32_t low, high, dest;
+	bool level, phys;
 
 	KASSERT(pin >= 0 && pin < REDIR_ENTRIES,
 	    ("vioapic_set_pinstate: invalid pin number %d", pin));
@@ -120,52 +110,20 @@ vioapic_send_intr(struct vioapic *vioapic, int pin)
 	low = vioapic->rtbl[pin].reg;
 	high = vioapic->rtbl[pin].reg >> 32;
 
-	/*
-	 * XXX We only deal with:
-	 * - physical destination
-	 * - fixed delivery mode
-	 */
-	if ((low & IOART_DESTMOD) != IOART_DESTPHY) {
-		VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported dest mode "
-		    "0x%08x", pin, low);
-		return;
-	}
-
-	if ((low & IOART_DELMOD) != IOART_DELFIXED) {
-		VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported delivery mode "
-		    "0x%08x", pin, low);
-		return;
-	}
-
 	if ((low & IOART_INTMASK) == IOART_INTMSET) {
 		VIOAPIC_CTR1(vioapic, "ioapic pin%d: masked", pin);
 		return;
 	}
 
+	phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
+	delmode = low & IOART_DELMOD;
 	level = low & IOART_TRGRLVL ? true : false;
 	if (level)
 		vioapic->rtbl[pin].reg |= IOART_REM_IRR;
 
 	vector = low & IOART_INTVEC;
-	apicid = high >> APIC_ID_SHIFT;
-	if (apicid != 0xff) {
-		/* unicast */
-		vcpuid = vm_apicid2vcpuid(vioapic->vm, apicid);
-		VIOAPIC_CTR4(vioapic, "ioapic pin%d: %s triggered intr "
-		    "vector %d on vcpuid %d", pin, trigger_str(level),
-		    vector, vcpuid);
-		lapic_set_intr(vioapic->vm, vcpuid, vector, level);
-	} else {
-		/* broadcast */
-		VIOAPIC_CTR3(vioapic, "ioapic pin%d: %s triggered intr "
-		    "vector %d on all vcpus", pin, trigger_str(level), vector);
-		dmask = vm_active_cpus(vioapic->vm);
-		while ((vcpuid = CPU_FFS(&dmask)) != 0) {
-			vcpuid--;
-			CPU_CLR(vcpuid, &dmask);
-			lapic_set_intr(vioapic->vm, vcpuid, vector, level);
-		}
-	}
+	dest = high >> APIC_ID_SHIFT;
+	vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector);
 }
 
 static void
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 6e5b5ea..695040d 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -30,8 +30,10 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
+#include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 
@@ -53,6 +55,9 @@ __FBSDID("$FreeBSD$");
 #define	VLAPIC_CTR1(vlapic, format, p1)					\
 	VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
 
+#define	VLAPIC_CTR2(vlapic, format, p1, p2)				\
+	VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
+
 #define	VLAPIC_CTR_IRR(vlapic, msg)					\
 do {									\
 	uint32_t *irrptr = &(vlapic)->apic.irr0;			\
@@ -86,7 +91,7 @@ static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
 #define	PRIO(x)			((x) >> 4)
 
 #define VLAPIC_VERSION		(16)
-#define VLAPIC_MAXLVT_ENTRIES	(5)
+#define VLAPIC_MAXLVT_ENTRIES	(APIC_LVT_CMCI)
 
 #define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
 
@@ -100,12 +105,16 @@ struct vlapic {
 	struct vm		*vm;
 	int			vcpuid;
 
-	struct LAPIC		 apic;
+	struct LAPIC		apic;
 
-	int			 esr_update;
+	uint32_t		esr_pending;
+	int			esr_firing;
 
-	int			 divisor;
-	int			 ccr_ticks;
+	struct callout	callout;	/* vlapic timer */
+	struct bintime	timer_fire_bt;	/* callout expiry time */
+	struct bintime	timer_freq_bt;	/* timer frequency */
+	struct bintime	timer_period_bt; /* timer period */
+	struct mtx	timer_mtx;
 
 	/*
 	 * The 'isrvec_stk' is a stack of vectors injected by the local apic.
@@ -120,8 +129,101 @@ struct vlapic {
 	enum boot_state		boot_state;
 };
 
+/*
+ * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
+ * vlapic_callout_handler() and vcpu accesses to the following registers:
+ * - initial count register aka icr_timer
+ * - current count register aka ccr_timer
+ * - divide config register aka dcr_timer
+ * - timer LVT register
+ *
+ * Note that the vlapic_callout_handler() does not write to any of these
+ * registers so they can be safely read from the vcpu context without locking.
+ */
+#define	VLAPIC_TIMER_LOCK(vlapic)	mtx_lock_spin(&((vlapic)->timer_mtx))
+#define	VLAPIC_TIMER_UNLOCK(vlapic)	mtx_unlock_spin(&((vlapic)->timer_mtx))
+#define	VLAPIC_TIMER_LOCKED(vlapic)	mtx_owned(&((vlapic)->timer_mtx))
+
 #define VLAPIC_BUS_FREQ	tsc_freq
 
+static __inline uint32_t
+vlapic_get_id(struct vlapic *vlapic)
+{
+
+	if (x2apic(vlapic))
+		return (vlapic->vcpuid);
+	else
+		return (vlapic->vcpuid << 24);
+}
+
+static __inline uint32_t
+vlapic_get_ldr(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	int apicid;
+	uint32_t ldr;
+
+	lapic = &vlapic->apic;
+	if (x2apic(vlapic)) {
+		apicid = vlapic_get_id(vlapic);
+		ldr = 1 << (apicid & 0xf);
+		ldr |= (apicid & 0xffff0) << 12;
+		return (ldr);
+	} else
+		return (lapic->ldr);
+}
+
+static __inline uint32_t
+vlapic_get_dfr(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+
+	lapic = &vlapic->apic;
+	if (x2apic(vlapic))
+		return (0);
+	else
+		return (lapic->dfr);
+}
+
+static void
+vlapic_set_dfr(struct vlapic *vlapic, uint32_t data)
+{
+	uint32_t dfr;
+	struct LAPIC *lapic;
+	
+	if (x2apic(vlapic)) {
+		VM_CTR1(vlapic->vm, "write to DFR in x2apic mode: %#x", data);
+		return;
+	}
+
+	lapic = &vlapic->apic;
+	dfr = (lapic->dfr & APIC_DFR_RESERVED) | (data & APIC_DFR_MODEL_MASK);
+	if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
+		VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
+	else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
+		VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
+	else
+		VLAPIC_CTR1(vlapic, "vlapic DFR in Unknown Model %#x", dfr);
+
+	lapic->dfr = dfr;
+}
+
+static void
+vlapic_set_ldr(struct vlapic *vlapic, uint32_t data)
+{
+	struct LAPIC *lapic;
+
+	/* LDR is read-only in x2apic mode */
+	if (x2apic(vlapic)) {
+		VLAPIC_CTR1(vlapic, "write to LDR in x2apic mode: %#x", data);
+		return;
+	}
+
+	lapic = &vlapic->apic;
+	lapic->ldr = data & ~APIC_LDR_RESERVED;
+	VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
+}
+
 static int
 vlapic_timer_divisor(uint32_t dcr)
 {
@@ -167,48 +269,92 @@ vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
 }
 #endif
 
-static uint64_t
+static uint32_t
 vlapic_get_ccr(struct vlapic *vlapic)
 {
-	struct LAPIC    *lapic = &vlapic->apic;
-	return lapic->ccr_timer;
+	struct bintime bt_now, bt_rem;
+	struct LAPIC *lapic;
+	uint32_t ccr;
+	
+	ccr = 0;
+	lapic = &vlapic->apic;
+
+	VLAPIC_TIMER_LOCK(vlapic);
+	if (callout_active(&vlapic->callout)) {
+		/*
+		 * If the timer is scheduled to expire in the future then
+		 * compute the value of 'ccr' based on the remaining time.
+		 */
+		binuptime(&bt_now);
+		if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
+			bt_rem = vlapic->timer_fire_bt;
+			bintime_sub(&bt_rem, &bt_now);
+			ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
+			ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
+		}
+	}
+	KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
+	    "icr_timer is %#x", ccr, lapic->icr_timer));
+	VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
+	    ccr, lapic->icr_timer);
+	VLAPIC_TIMER_UNLOCK(vlapic);
+	return (ccr);
 }
 
 static void
-vlapic_update_errors(struct vlapic *vlapic)
+vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr)
 {
-	struct LAPIC    *lapic = &vlapic->apic;
-	lapic->esr = 0; // XXX 
+	struct LAPIC *lapic;
+	int divisor;
+	
+	lapic = &vlapic->apic;
+	VLAPIC_TIMER_LOCK(vlapic);
+
+	lapic->dcr_timer = dcr;
+	divisor = vlapic_timer_divisor(dcr);
+	VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", dcr, divisor);
+
+	/*
+	 * Update the timer frequency and the timer period.
+	 *
+	 * XXX changes to the frequency divider will not take effect until
+	 * the timer is reloaded.
+	 */
+	FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
+	vlapic->timer_period_bt = vlapic->timer_freq_bt;
+	bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
+
+	VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
 static void
-vlapic_init_ipi(struct vlapic *vlapic)
+vlapic_update_errors(struct vlapic *vlapic)
 {
 	struct LAPIC    *lapic = &vlapic->apic;
-	lapic->version = VLAPIC_VERSION;
-	lapic->version |= (VLAPIC_MAXLVT_ENTRIES < MAXLVTSHIFT);
-	lapic->dfr = 0xffffffff;
-	lapic->svr = APIC_SVR_VECTOR;
-	vlapic_mask_lvts(&lapic->lvt_timer, VLAPIC_MAXLVT_ENTRIES+1);
+	lapic->esr = vlapic->esr_pending;
+	vlapic->esr_pending = 0;
 }
 
-static int
+static void
 vlapic_reset(struct vlapic *vlapic)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
+	struct LAPIC *lapic;
+	
+	lapic = &vlapic->apic;
+	bzero(lapic, sizeof(struct LAPIC));
 
-	memset(lapic, 0, sizeof(*lapic));
-	lapic->apr = vlapic->vcpuid;
-	vlapic_init_ipi(vlapic);
-	vlapic->divisor = vlapic_timer_divisor(lapic->dcr_timer);
+	lapic->version = VLAPIC_VERSION;
+	lapic->version |= (VLAPIC_MAXLVT_ENTRIES << MAXLVTSHIFT);
+	lapic->dfr = 0xffffffff;
+	lapic->svr = APIC_SVR_VECTOR;
+	vlapic_mask_lvts(&lapic->lvt_timer, 6);
+	vlapic_mask_lvts(&lapic->lvt_cmci, 1);
+	vlapic_set_dcr(vlapic, 0);
 
 	if (vlapic->vcpuid == 0)
 		vlapic->boot_state = BS_RUNNING;	/* BSP */
 	else
 		vlapic->boot_state = BS_INIT;		/* AP */
-	
-	return 0;
-
 }
 
 void
@@ -221,6 +367,17 @@ vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 	if (vector < 0 || vector >= 256)
 		panic("vlapic_set_intr_ready: invalid vector %d\n", vector);
 
+	if (!(lapic->svr & APIC_SVR_ENABLE)) {
+		VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
+		    "interrupt %d", vector);
+		return;
+	}
+
+	if (vector < 16) {
+		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR);
+		return;
+	}
+		
 	idx = (vector / 32) * 4;
 	mask = 1 << (vector % 32);
 
@@ -241,39 +398,93 @@ vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
 }
 
-static void
-vlapic_start_timer(struct vlapic *vlapic, uint32_t elapsed)
+static __inline uint32_t *
+vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
 {
-	uint32_t icr_timer;
-
-	icr_timer = vlapic->apic.icr_timer;
+	struct LAPIC	*lapic = &vlapic->apic;
+	int 		 i;
 
-	vlapic->ccr_ticks = ticks;
-	if (elapsed < icr_timer)
-		vlapic->apic.ccr_timer = icr_timer - elapsed;
-	else {
-		/*
-		 * This can happen when the guest is trying to run its local
-		 * apic timer higher that the setting of 'hz' in the host.
-		 *
-		 * We deal with this by running the guest local apic timer
-		 * at the rate of the host's 'hz' setting.
-		 */
-		vlapic->apic.ccr_timer = 0;
+	switch (offset) {
+	case APIC_OFFSET_CMCI_LVT:
+		return (&lapic->lvt_cmci);
+	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
+		return ((&lapic->lvt_timer) + i);;
+	default:
+		panic("vlapic_get_lvt: invalid LVT\n");
 	}
 }
 
-static __inline uint32_t *
+static __inline uint32_t
 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
-	int 		 i;
 
-	if (offset < APIC_OFFSET_TIMER_LVT || offset > APIC_OFFSET_ERROR_LVT) {
-		panic("vlapic_get_lvt: invalid LVT\n");
+	return (*vlapic_get_lvtptr(vlapic, offset));
+}
+
+static void
+vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val)
+{
+	uint32_t *lvtptr, mask;
+	struct LAPIC *lapic;
+	
+	lapic = &vlapic->apic;
+	lvtptr = vlapic_get_lvtptr(vlapic, offset);	
+
+	if (offset == APIC_OFFSET_TIMER_LVT)
+		VLAPIC_TIMER_LOCK(vlapic);
+
+	if (!(lapic->svr & APIC_SVR_ENABLE))
+		val |= APIC_LVT_M;
+	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
+	switch (offset) {
+	case APIC_OFFSET_TIMER_LVT:
+		mask |= APIC_LVTT_TM;
+		break;
+	case APIC_OFFSET_ERROR_LVT:
+		break;
+	case APIC_OFFSET_LINT0_LVT:
+	case APIC_OFFSET_LINT1_LVT:
+		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
+		/* FALLTHROUGH */
+	default:
+		mask |= APIC_LVT_DM;
+		break;
 	}
-	i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
-	return ((&lapic->lvt_timer) + i);;
+	*lvtptr = val & mask;
+
+	if (offset == APIC_OFFSET_TIMER_LVT)
+		VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+static int
+vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt)
+{
+	uint32_t vec, mode;
+
+	if (lvt & APIC_LVT_M)
+		return (0);
+
+	vec = lvt & APIC_LVT_VECTOR;
+	mode = lvt & APIC_LVT_DM;
+
+	switch (mode) {
+	case APIC_LVT_DM_FIXED:
+		if (vec < 16) {
+			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
+			return (0);
+		}
+		vlapic_set_intr_ready(vlapic, vec, false);
+		vcpu_notify_event(vlapic->vm, vlapic->vcpuid);
+		break;
+	case APIC_LVT_DM_NMI:
+		vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
+		break;
+	default:
+		// Other modes ignored
+		return (0);
+	}
+	return (1);
 }
 
 #if 1
@@ -398,44 +609,314 @@ vlapic_process_eoi(struct vlapic *vlapic)
 }
 
 static __inline int
-vlapic_get_lvt_field(uint32_t *lvt, uint32_t mask)
+vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
 {
-	return (*lvt & mask);
+
+	return (lvt & mask);
 }
 
 static __inline int
 vlapic_periodic_timer(struct vlapic *vlapic)
 {
-	uint32_t *lvt;
+	uint32_t lvt;
 	
 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
 
 	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
 }
 
+static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
+
+void
+vlapic_set_error(struct vlapic *vlapic, uint32_t mask)
+{
+	uint32_t lvt;
+
+	vlapic->esr_pending |= mask;
+	if (vlapic->esr_firing)
+		return;
+	vlapic->esr_firing = 1;
+
+	// The error LVT always uses the fixed delivery mode.
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
+	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
+		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
+	}
+	vlapic->esr_firing = 0;
+}
+
 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
 
 static void
 vlapic_fire_timer(struct vlapic *vlapic)
 {
-	int vector;
-	uint32_t *lvt;
+	uint32_t lvt;
+
+	KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
 	
+	// The timer LVT always uses the fixed delivery mode.
 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
-
-	if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) {
+	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
-		vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR);
-		vlapic_set_intr_ready(vlapic, vector, false);
+	}
+}
+
+static VMM_STAT(VLAPIC_INTR_CMC,
+    "corrected machine check interrupts generated by vlapic");
+
+void
+vlapic_fire_cmci(struct vlapic *vlapic)
+{
+	uint32_t lvt;
+
+	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
+	if (vlapic_fire_lvt(vlapic, lvt)) {
+		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
+	}
+}
+
+static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_ENTRIES,
+    "lvts triggered");
+
+int
+vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
+{
+	uint32_t lvt;
+
+	switch (vector) {
+	case APIC_LVT_LINT0:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT);
+		break;
+	case APIC_LVT_LINT1:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT);
+		break;
+	case APIC_LVT_TIMER:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
+		lvt |= APIC_LVT_DM_FIXED;
+		break;
+	case APIC_LVT_ERROR:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
+		lvt |= APIC_LVT_DM_FIXED;
+		break;
+	case APIC_LVT_PMC:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT);
+		break;
+	case APIC_LVT_THERMAL:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT);
+		break;
+	case APIC_LVT_CMCI:
+		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
+		break;
+	default:
+		return (EINVAL);
+	}
+	if (vlapic_fire_lvt(vlapic, lvt)) {
+		vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
+		    LVTS_TRIGGERRED, vector, 1);
+	}
+	return (0);
+}
+
+static void
+vlapic_callout_handler(void *arg)
+{
+	struct vlapic *vlapic;
+	struct bintime bt, btnow;
+	sbintime_t rem_sbt;
+
+	vlapic = arg;
+
+	VLAPIC_TIMER_LOCK(vlapic);
+	if (callout_pending(&vlapic->callout))	/* callout was reset */
+		goto done;
+
+	if (!callout_active(&vlapic->callout))	/* callout was stopped */
+		goto done;
+
+	callout_deactivate(&vlapic->callout);
+
+	KASSERT(vlapic->apic.icr_timer != 0, ("vlapic timer is disabled"));
+
+	vlapic_fire_timer(vlapic);
+
+	if (vlapic_periodic_timer(vlapic)) {
+		binuptime(&btnow);
+		KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
+		    ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx",
+		    btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
+		    vlapic->timer_fire_bt.frac));
+
+		/*
+		 * Compute the delta between when the timer was supposed to
+		 * fire and the present time.
+		 */
+		bt = btnow;
+		bintime_sub(&bt, &vlapic->timer_fire_bt);
+
+		rem_sbt = bttosbt(vlapic->timer_period_bt);
+		if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) {
+			/*
+			 * Adjust the time until the next countdown downward
+			 * to account for the lost time.
+			 */
+			rem_sbt -= bttosbt(bt);
+		} else {
+			/*
+			 * If the delta is greater than the timer period then
+			 * just reset our time base instead of trying to catch
+			 * up.
+			 */
+			vlapic->timer_fire_bt = btnow;
+			VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
+			    "usecs, period is %lu usecs - resetting time base",
+			    bttosbt(bt) / SBT_1US,
+			    bttosbt(vlapic->timer_period_bt) / SBT_1US);
+		}
+
+		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
+		callout_reset_sbt(&vlapic->callout, rem_sbt, 0,
+		    vlapic_callout_handler, vlapic, 0);
+	}
+done:
+	VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+static void
+vlapic_set_icr_timer(struct vlapic *vlapic, uint32_t icr_timer)
+{
+	struct LAPIC *lapic;
+	sbintime_t sbt;
+
+	VLAPIC_TIMER_LOCK(vlapic);
+
+	lapic = &vlapic->apic;
+	lapic->icr_timer = icr_timer;
+
+	vlapic->timer_period_bt = vlapic->timer_freq_bt;
+	bintime_mul(&vlapic->timer_period_bt, icr_timer);
+
+	if (icr_timer != 0) {
+		binuptime(&vlapic->timer_fire_bt);
+		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
+
+		sbt = bttosbt(vlapic->timer_period_bt);
+		callout_reset_sbt(&vlapic->callout, sbt, 0,
+		    vlapic_callout_handler, vlapic, 0);
+	} else
+		callout_stop(&vlapic->callout);
+
+	VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+/*
+ * This function populates 'dmask' with the set of vcpus that match the
+ * addressing specified by the (dest, phys, lowprio) tuple.
+ * 
+ * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
+ * or xAPIC (8-bit) destination field.
+ */
+static void
+vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
+    bool lowprio, bool x2apic_dest)
+{
+	struct vlapic *vlapic;
+	uint32_t dfr, ldr, ldest, cluster;
+	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
+	cpuset_t amask;
+	int vcpuid;
+
+	if ((x2apic_dest && dest == 0xffffffff) ||
+	    (!x2apic_dest && dest == 0xff)) {
+		/*
+		 * Broadcast in both logical and physical modes.
+		 */
+		*dmask = vm_active_cpus(vm);
+		return;
+	}
+
+	if (phys) {
+		/*
+		 * Physical mode: destination is APIC ID.
+		 */
+		CPU_ZERO(dmask);
+		vcpuid = vm_apicid2vcpuid(vm, dest);
+		if (vcpuid < VM_MAXCPU)
+			CPU_SET(vcpuid, dmask);
+	} else {
+		/*
+		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
+		 * bitmask. This model is only avilable in the xAPIC mode.
+		 */
+		mda_flat_ldest = dest & 0xff;
+
+		/*
+		 * In the "Cluster Model" the MDA is used to identify a
+		 * specific cluster and a set of APICs in that cluster.
+		 */
+		if (x2apic_dest) {
+			mda_cluster_id = dest >> 16;
+			mda_cluster_ldest = dest & 0xffff;
+		} else {
+			mda_cluster_id = (dest >> 4) & 0xf;
+			mda_cluster_ldest = dest & 0xf;
+		}
+
+		/*
+		 * Logical mode: match each APIC that has a bit set
+		 * in it's LDR that matches a bit in the ldest.
+		 */
+		CPU_ZERO(dmask);
+		amask = vm_active_cpus(vm);
+		while ((vcpuid = CPU_FFS(&amask)) != 0) {
+			vcpuid--;
+			CPU_CLR(vcpuid, &amask);
+
+			vlapic = vm_lapic(vm, vcpuid);
+			dfr = vlapic_get_dfr(vlapic);
+			ldr = vlapic_get_ldr(vlapic);
+
+			if ((dfr & APIC_DFR_MODEL_MASK) ==
+			    APIC_DFR_MODEL_FLAT) {
+				ldest = ldr >> 24;
+				mda_ldest = mda_flat_ldest;
+			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
+			    APIC_DFR_MODEL_CLUSTER) {
+				if (x2apic(vlapic)) {
+					cluster = ldr >> 16;
+					ldest = ldr & 0xffff;
+				} else {
+					cluster = ldr >> 28;
+					ldest = (ldr >> 24) & 0xf;
+				}
+				if (cluster != mda_cluster_id)
+					continue;
+				mda_ldest = mda_cluster_ldest;
+			} else {
+				/*
+				 * Guest has configured a bad logical
+				 * model for this vcpu - skip it.
+				 */
+				VLAPIC_CTR1(vlapic, "vlapic has bad logical "
+				    "model %x - cannot deliver interrupt", dfr);
+				continue;
+			}
+
+			if ((mda_ldest & ldest) != 0) {
+				CPU_SET(vcpuid, dmask);
+				if (lowprio)
+					break;
+			}
+		}
 	}
 }
 
 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
 
 static int
-lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
+lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu)
 {
 	int i;
+	bool phys;
 	cpuset_t dmask;
 	uint32_t dest, vec, mode;
 	struct vlapic *vlapic2;
@@ -448,10 +929,17 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
 	vec = icrval & APIC_VECTOR_MASK;
 	mode = icrval & APIC_DELMODE_MASK;
 
+	if (mode == APIC_DELMODE_FIXED && vec < 16) {
+		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
+		return (0);
+	}
+	
 	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
 		switch (icrval & APIC_DEST_MASK) {
 		case APIC_DEST_DESTFLD:
-			CPU_SETOF(dest, &dmask);
+			phys = ((icrval & APIC_DESTMODE_LOG) == 0);
+			vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
+			    x2apic(vlapic));
 			break;
 		case APIC_DEST_SELF:
 			CPU_SETOF(vlapic->vcpuid, &dmask);
@@ -508,17 +996,18 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
 			if (vlapic2->boot_state != BS_SIPI)
 				return (0);
 
-			vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
-			vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
-			vmexit->u.spinup_ap.vcpu = dest;
-			vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
-
 			/*
 			 * XXX this assumes that the startup IPI always succeeds
 			 */
 			vlapic2->boot_state = BS_RUNNING;
 			vm_activate_cpu(vlapic2->vm, dest);
 
+			*retu = true;
+			vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
+			vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
+			vmexit->u.spinup_ap.vcpu = dest;
+			vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
+
 			return (0);
 		}
 	}
@@ -555,7 +1044,6 @@ vlapic_pending_intr(struct vlapic *vlapic)
 				break;
 		}
 	}
-	VLAPIC_CTR0(vlapic, "no pending intr");
 	return (-1);
 }
 
@@ -593,8 +1081,39 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
 	vlapic_update_ppr(vlapic);
 }
 
+static void
+lapic_set_svr(struct vlapic *vlapic, uint32_t new)
+{
+	struct LAPIC *lapic;
+	uint32_t old, changed;
+
+	lapic = &vlapic->apic;
+	old = lapic->svr;
+	changed = old ^ new;
+	if ((changed & APIC_SVR_ENABLE) != 0) {
+		if ((new & APIC_SVR_ENABLE) == 0) {
+			/*
+			 * The apic is now disabled so stop the apic timer.
+			 */
+			VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
+			VLAPIC_TIMER_LOCK(vlapic);
+			callout_stop(&vlapic->callout);
+			VLAPIC_TIMER_UNLOCK(vlapic);
+		} else {
+			/*
+			 * The apic is now enabled so restart the apic timer
+			 * if it is configured in periodic mode.
+			 */
+			VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
+			if (vlapic_periodic_timer(vlapic))
+				vlapic_set_icr_timer(vlapic, lapic->icr_timer);
+		}
+	}
+	lapic->svr = new;
+}
+
 int
-vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
+vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
 {
 	struct LAPIC	*lapic = &vlapic->apic;
 	uint32_t	*reg;
@@ -602,17 +1121,14 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
 
 	if (offset > sizeof(*lapic)) {
 		*data = 0;
-		return 0;
+		goto done;
 	}
 	
 	offset &= ~3;
 	switch(offset)
 	{
 		case APIC_OFFSET_ID:
-			if (x2apic(vlapic))
-				*data = vlapic->vcpuid;
-			else
-				*data = vlapic->vcpuid << 24;
+			*data = vlapic_get_id(vlapic);
 			break;
 		case APIC_OFFSET_VER:
 			*data = lapic->version;
@@ -630,10 +1146,10 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
 			*data = lapic->eoi;
 			break;
 		case APIC_OFFSET_LDR:
-			*data = lapic->ldr;
+			*data = vlapic_get_ldr(vlapic);
 			break;
 		case APIC_OFFSET_DFR:
-			*data = lapic->dfr;
+			*data = vlapic_get_dfr(vlapic);
 			break;
 		case APIC_OFFSET_SVR:
 			*data = lapic->svr;
@@ -662,9 +1178,9 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
 		case APIC_OFFSET_ICR_HI: 
 			*data = lapic->icr_hi;
 			break;
+		case APIC_OFFSET_CMCI_LVT:
 		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
-			reg = vlapic_get_lvt(vlapic, offset);	
-			*data = *(reg);
+			*data = vlapic_get_lvt(vlapic, offset);	
 			break;
 		case APIC_OFFSET_ICR:
 			*data = lapic->icr_timer;
@@ -680,16 +1196,19 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
 			*data = 0;
 			break;
 	}
+done:
+	VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data);
 	return 0;
 }
 
 int
-vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
+vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
 {
 	struct LAPIC	*lapic = &vlapic->apic;
-	uint32_t	*reg;
 	int		retval;
 
+	VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data);
+
 	if (offset > sizeof(*lapic)) {
 		return 0;
 	}
@@ -708,18 +1227,20 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
 			vlapic_process_eoi(vlapic);
 			break;
 		case APIC_OFFSET_LDR:
+			vlapic_set_ldr(vlapic, data);
 			break;
 		case APIC_OFFSET_DFR:
+			vlapic_set_dfr(vlapic, data);
 			break;
 		case APIC_OFFSET_SVR:
-			lapic->svr = data;
+			lapic_set_svr(vlapic, data);
 			break;
 		case APIC_OFFSET_ICR_LOW: 
 			if (!x2apic(vlapic)) {
 				data &= 0xffffffff;
 				data |= (uint64_t)lapic->icr_hi << 32;
 			}
-			retval = lapic_process_icr(vlapic, data);
+			retval = lapic_process_icr(vlapic, data, retu);
 			break;
 		case APIC_OFFSET_ICR_HI:
 			if (!x2apic(vlapic)) {
@@ -727,22 +1248,16 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
 				lapic->icr_hi = data;
 			}
 			break;
+		case APIC_OFFSET_CMCI_LVT:
 		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
-			reg = vlapic_get_lvt(vlapic, offset);	
-			if (!(lapic->svr & APIC_SVR_ENABLE)) {
-				data |= APIC_LVT_M;
-			}
-			*reg = data;
-			// vlapic_dump_lvt(offset, reg);
+			vlapic_set_lvt(vlapic, offset, data);
 			break;
 		case APIC_OFFSET_ICR:
-			lapic->icr_timer = data;
-			vlapic_start_timer(vlapic, 0);
+			vlapic_set_icr_timer(vlapic, data);
 			break;
 
 		case APIC_OFFSET_DCR:
-			lapic->dcr_timer = data;
-			vlapic->divisor = vlapic_timer_divisor(data);
+			vlapic_set_dcr(vlapic, data);
 			break;
 
 		case APIC_OFFSET_ESR:
@@ -764,70 +1279,6 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
 	return (retval);
 }
 
-int
-vlapic_timer_tick(struct vlapic *vlapic)
-{
-	int curticks, delta, periodic, fired;
-	uint32_t ccr;
-	uint32_t decrement, leftover;
-
-restart:
-	curticks = ticks;
-	delta = curticks - vlapic->ccr_ticks;
-
-	/* Local APIC timer is disabled */
-	if (vlapic->apic.icr_timer == 0)
-		return (-1);
-
-	/* One-shot mode and timer has already counted down to zero */
-	periodic = vlapic_periodic_timer(vlapic);
-	if (!periodic && vlapic->apic.ccr_timer == 0)
-		return (-1);
-	/*
-	 * The 'curticks' and 'ccr_ticks' are out of sync by more than
-	 * 2^31 ticks. We deal with this by restarting the timer.
-	 */
-	if (delta < 0) {
-		vlapic_start_timer(vlapic, 0);
-		goto restart;
-	}
-
-	fired = 0;
-	decrement = (VLAPIC_BUS_FREQ / vlapic->divisor) / hz;
-
-	vlapic->ccr_ticks = curticks;
-	ccr = vlapic->apic.ccr_timer;
-
-	while (delta-- > 0) {
-		if (ccr > decrement) {
-			ccr -= decrement;
-			continue;
-		}
-
-		/* Trigger the local apic timer interrupt */
-		vlapic_fire_timer(vlapic);
-		if (periodic) {
-			leftover = decrement - ccr;
-			vlapic_start_timer(vlapic, leftover);
-			ccr = vlapic->apic.ccr_timer;
-		} else {
-			/*
-			 * One-shot timer has counted down to zero.
-			 */
-			ccr = 0;
-		}
-		fired = 1;
-		break;
-	}
-
-	vlapic->apic.ccr_timer = ccr;
-
-	if (!fired)
-		return ((ccr / decrement) + 1);
-	else
-		return (0);
-}
-
 struct vlapic *
 vlapic_init(struct vm *vm, int vcpuid)
 {
@@ -837,6 +1288,16 @@ vlapic_init(struct vm *vm, int vcpuid)
 	vlapic->vm = vm;
 	vlapic->vcpuid = vcpuid;
 
+	/*
+	 * If the vlapic is configured in x2apic mode then it will be
+	 * accessed in the critical section via the MSR emulation code.
+	 *
+	 * Therefore the timer mutex must be a spinlock because blockable
+	 * mutexes cannot be acquired in a critical section.
+	 */
+	mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
+	callout_init(&vlapic->callout, 1);
+
 	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
 
 	if (vcpuid == 0)
@@ -851,6 +1312,7 @@ void
 vlapic_cleanup(struct vlapic *vlapic)
 {
 
+	callout_drain(&vlapic->callout);
 	free(vlapic, M_VLAPIC);
 }
 
@@ -887,3 +1349,43 @@ vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
 	if (state == X2APIC_DISABLED)
 		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
 }
+
+void
+vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
+    int delmode, int vec)
+{
+	bool lowprio;
+	int vcpuid;
+	cpuset_t dmask;
+
+	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
+		VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode);
+		return;
+	}
+	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
+
+	/*
+	 * We don't provide any virtual interrupt redirection hardware so
+	 * all interrupts originating from the ioapic or MSI specify the
+	 * 'dest' in the legacy xAPIC format.
+	 */
+	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
+
+	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
+		vcpuid--;
+		CPU_CLR(vcpuid, &dmask);
+		lapic_set_intr(vm, vcpuid, vec, level);
+	}
+}
+
+bool
+vlapic_enabled(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic = &vlapic->apic;
+
+	if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
+	    (lapic->svr & APIC_SVR_ENABLE) != 0)
+		return (true);
+	else
+		return (false);
+}
diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
index 8ea65ee..98f377e 100644
--- a/sys/amd64/vmm/io/vlapic.h
+++ b/sys/amd64/vmm/io/vlapic.h
@@ -69,6 +69,7 @@ struct vm;
 #define APIC_OFFSET_IRR6 	0x260   // IRR  192-223                    	R
 #define APIC_OFFSET_IRR7 	0x270   // IRR  224-255                    	R
 #define APIC_OFFSET_ESR		0x280   // Error Status Register           	R
+#define APIC_OFFSET_CMCI_LVT 	0x2F0   // Local Vector Table (CMCI)      	R/W
 #define APIC_OFFSET_ICR_LOW 	0x300   // Interrupt Command Reg. (0-31)   	R/W
 #define APIC_OFFSET_ICR_HI 	0x310   // Interrupt Command Reg. (32-63)  	R/W
 #define APIC_OFFSET_TIMER_LVT 	0x320   // Local Vector Table (Timer)      	R/W
@@ -90,15 +91,22 @@ enum x2apic_state;
 
 struct vlapic *vlapic_init(struct vm *vm, int vcpuid);
 void vlapic_cleanup(struct vlapic *vlapic);
-int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data);
-int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data);
+int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data,
+    bool *retu);
+int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data,
+    bool *retu);
 int vlapic_pending_intr(struct vlapic *vlapic);
 void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
 void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
-int vlapic_timer_tick(struct vlapic *vlapic);
+void vlapic_set_error(struct vlapic *vlapic, uint32_t mask);
+void vlapic_fire_cmci(struct vlapic *vlapic);
+int vlapic_trigger_lvt(struct vlapic *vlapic, int vector);
 
 uint64_t vlapic_get_apicbase(struct vlapic *vlapic);
 void vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
 void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s);
+bool vlapic_enabled(struct vlapic *vlapic);
 
+void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
+    int delmode, int vec);
 #endif	/* _VLAPIC_H_ */
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 8cbd679..f471218b 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/vm.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
+#include <x86/psl.h>
 #include <x86/apicreg.h>
 #include <machine/vmparam.h>
 
@@ -869,41 +870,44 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
  */
 static int
-vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t *retu)
+vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 {
+	struct vm_exit *vmexit;
 	struct vcpu *vcpu;
-	int sleepticks, t;
+	int t, timo;
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 
 	/*
-	 * Figure out the number of host ticks until the next apic
-	 * timer interrupt in the guest.
-	 */
-	sleepticks = lapic_timer_tick(vm, vcpuid);
-
-	/*
-	 * If the guest local apic timer is disabled then sleep for
-	 * a long time but not forever.
-	 */
-	if (sleepticks < 0)
-		sleepticks = hz;
-
-	/*
 	 * Do a final check for pending NMI or interrupts before
 	 * really putting this thread to sleep.
 	 *
 	 * These interrupts could have happened any time after we
 	 * returned from VMRUN() and before we grabbed the vcpu lock.
 	 */
-	if (!vm_nmi_pending(vm, vcpuid) && lapic_pending_intr(vm, vcpuid) < 0) {
-		if (sleepticks <= 0)
-			panic("invalid sleepticks %d", sleepticks);
+	if (!vm_nmi_pending(vm, vcpuid) &&
+	    (intr_disabled || vlapic_pending_intr(vcpu->vlapic) < 0)) {
 		t = ticks;
 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
-		msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks);
+		if (vlapic_enabled(vcpu->vlapic)) {
+			/*
+			 * XXX msleep_spin() is not interruptible so use the
+			 * 'timo' to put an upper bound on the sleep time.
+			 */
+			timo = hz;
+			msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo);
+		} else {
+			/*
+			 * Spindown the vcpu if the apic is disabled and it
+			 * had entered the halted state.
+			 */
+			*retu = true;
+			vmexit = vm_exitinfo(vm, vcpuid);
+			vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
+			VCPU_CTR0(vm, vcpuid, "spinning down cpu");
+		}
 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
 	}
@@ -913,7 +917,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t *retu)
 }
 
 static int
-vm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu)
+vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
 {
 	int rv, ftype;
 	struct vm_map *map;
@@ -951,7 +955,7 @@ done:
 }
 
 static int
-vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu)
+vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
 {
 	struct vie *vie;
 	struct vcpu *vcpu;
@@ -992,15 +996,12 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu)
 		mread = vhpet_mmio_read;
 		mwrite = vhpet_mmio_write;
 	} else {
-		*retu = TRUE;
+		*retu = true;
 		return (0);
 	}
 
-	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, 0);
-
-	/* return to userland to spin up the AP */
-	if (error == 0 && vme->exitcode == VM_EXITCODE_SPINUP_AP)
-		*retu = TRUE;
+	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
+	    retu);
 
 	return (error);
 }
@@ -1013,7 +1014,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
 	struct pcb *pcb;
 	uint64_t tscval, rip;
 	struct vm_exit *vme;
-	boolean_t retu;
+	bool retu, intr_disabled;
 	pmap_t pmap;
 
 	vcpuid = vmrun->cpuid;
@@ -1053,10 +1054,11 @@ restart:
 	critical_exit();
 
 	if (error == 0) {
-		retu = FALSE;
+		retu = false;
 		switch (vme->exitcode) {
 		case VM_EXITCODE_HLT:
-			error = vm_handle_hlt(vm, vcpuid, &retu);
+			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
+			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
 			break;
 		case VM_EXITCODE_PAGING:
 			error = vm_handle_paging(vm, vcpuid, &retu);
@@ -1065,12 +1067,12 @@ restart:
 			error = vm_handle_inst_emul(vm, vcpuid, &retu);
 			break;
 		default:
-			retu = TRUE;	/* handled in userland */
+			retu = true;	/* handled in userland */
 			break;
 		}
 	}
 
-	if (error == 0 && retu == FALSE) {
+	if (error == 0 && retu == false) {
 		rip = vme->rip + vme->inst_length;
 		goto restart;
 	}
@@ -1109,7 +1111,7 @@ vm_inject_nmi(struct vm *vm, int vcpuid)
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu->nmi_pending = 1;
-	vm_interrupt_hostcpu(vm, vcpuid);
+	vcpu_notify_event(vm, vcpuid);
 	return (0);
 }
 
@@ -1329,8 +1331,15 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
 	return (0);
 }
 
+/*
+ * This function is called to ensure that a vcpu "sees" a pending event
+ * as soon as possible:
+ * - If the vcpu thread is sleeping then it is woken up.
+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
+ *   to the host_cpu to cause the vcpu to trap into the hypervisor.
+ */
 void
-vm_interrupt_hostcpu(struct vm *vm, int vcpuid)
+vcpu_notify_event(struct vm *vm, int vcpuid)
 {
 	int hostcpu;
 	struct vcpu *vcpu;
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index f248f68..02847c2 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -152,6 +152,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	struct vm_run *vmrun;
 	struct vm_event *vmevent;
 	struct vm_lapic_irq *vmirq;
+	struct vm_lapic_msi *vmmsi;
 	struct vm_ioapic_irq *ioapic_irq;
 	struct vm_capability *vmcap;
 	struct vm_pptdev *pptdev;
@@ -254,7 +255,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 		pptmsi = (struct vm_pptdev_msi *)data;
 		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
 				      pptmsi->bus, pptmsi->slot, pptmsi->func,
-				      pptmsi->destcpu, pptmsi->vector,
+				      pptmsi->addr, pptmsi->msg,
 				      pptmsi->numvec);
 		break;
 	case VM_PPTDEV_MSIX:
@@ -262,8 +263,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
 				       pptmsix->bus, pptmsix->slot, 
 				       pptmsix->func, pptmsix->idx,
-				       pptmsix->msg, pptmsix->vector_control,
-				       pptmsix->addr);
+				       pptmsix->addr, pptmsix->msg,
+				       pptmsix->vector_control);
 		break;
 	case VM_MAP_PPTDEV_MMIO:
 		pptmmio = (struct vm_pptdev_mmio *)data;
@@ -296,6 +297,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
 		break;
+	case VM_LAPIC_LOCAL_IRQ:
+		vmirq = (struct vm_lapic_irq *)data;
+		error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
+		    vmirq->vector);
+		break;
+	case VM_LAPIC_MSI:
+		vmmsi = (struct vm_lapic_msi *)data;
+		error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
+		break;
 	case VM_IOAPIC_ASSERT_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
index 465ce6c..8d915cd 100644
--- a/sys/amd64/vmm/vmm_lapic.c
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -38,9 +38,18 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/vmm.h>
 #include "vmm_ipi.h"
+#include "vmm_ktr.h"
 #include "vmm_lapic.h"
 #include "vlapic.h"
 
+/*
+ * Some MSI message definitions
+ */
+#define	MSI_X86_ADDR_MASK	0xfff00000
+#define	MSI_X86_ADDR_BASE	0xfee00000
+#define	MSI_X86_ADDR_RH		0x00000008	/* Redirection Hint */
+#define	MSI_X86_ADDR_LOG	0x00000004	/* Destination Mode */
+
 int
 lapic_pending_intr(struct vm *vm, int cpu)
 {
@@ -75,19 +84,74 @@ lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
 	vlapic = vm_lapic(vm, cpu);
 	vlapic_set_intr_ready(vlapic, vector, level);
 
-	vm_interrupt_hostcpu(vm, cpu);
+	vcpu_notify_event(vm, cpu);
 
 	return (0);
 }
 
 int
-lapic_timer_tick(struct vm *vm, int cpu)
+lapic_set_local_intr(struct vm *vm, int cpu, int vector)
 {
 	struct vlapic *vlapic;
+	cpuset_t dmask;
+	int error;
 
-	vlapic = vm_lapic(vm, cpu);
+	if (cpu < -1 || cpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (cpu == -1)
+		dmask = vm_active_cpus(vm);
+	else
+		CPU_SETOF(cpu, &dmask);
+	error = 0;
+	while ((cpu = CPU_FFS(&dmask)) != 0) {
+		cpu--;
+		CPU_CLR(cpu, &dmask);
+		vlapic = vm_lapic(vm, cpu);
+		error = vlapic_trigger_lvt(vlapic, vector);
+		if (error)
+			break;
+	}
+
+	return (error);
+}
 
-	return (vlapic_timer_tick(vlapic));
+int
+lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg)
+{
+	int delmode, vec;
+	uint32_t dest;
+	bool phys;
+
+	VM_CTR2(vm, "lapic MSI addr: %#lx msg: %#lx", addr, msg);
+
+	if ((addr & MSI_X86_ADDR_MASK) != MSI_X86_ADDR_BASE) {
+		VM_CTR1(vm, "lapic MSI invalid addr %#lx", addr);
+		return (-1);
+	}
+
+	/*
+	 * Extract the x86-specific fields from the MSI addr/msg
+	 * params according to the Intel Arch spec, Vol3 Ch 10.
+	 *
+	 * The PCI specification does not support level triggered
+	 * MSI/MSI-X so ignore trigger level in 'msg'.
+	 *
+	 * The 'dest' is interpreted as a logical APIC ID if both
+	 * the Redirection Hint and Destination Mode are '1' and
+	 * physical otherwise.
+	 */
+	dest = (addr >> 12) & 0xff;
+	phys = ((addr & (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)) !=
+	    (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG));
+	delmode = msg & APIC_DELMODE_MASK;
+	vec = msg & 0xff;
+
+	VM_CTR3(vm, "lapic MSI %s dest %#x, vec %d",
+	    phys ? "physical" : "logical", dest, vec);
+
+	vlapic_deliver_intr(vm, LAPIC_TRIG_EDGE, dest, phys, delmode, vec);
+	return (0);
 }
 
 static boolean_t
@@ -117,7 +181,7 @@ lapic_msr(u_int msr)
 }
 
 int
-lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval)
+lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu)
 {
 	int error;
 	u_int offset;
@@ -130,14 +194,14 @@ lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval)
 		error = 0;
 	} else {
 		offset = x2apic_msr_to_regoff(msr);
-		error = vlapic_read(vlapic, offset, rval);
+		error = vlapic_read(vlapic, offset, rval, retu);
 	}
 
 	return (error);
 }
 
 int
-lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
+lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val, bool *retu)
 {
 	int error;
 	u_int offset;
@@ -150,7 +214,7 @@ lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
 		error = 0;
 	} else {
 		offset = x2apic_msr_to_regoff(msr);
-		error = vlapic_write(vlapic, offset, val);
+		error = vlapic_write(vlapic, offset, val, retu);
 	}
 
 	return (error);
@@ -174,7 +238,7 @@ lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size,
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
-	error = vlapic_write(vlapic, off, wval);
+	error = vlapic_write(vlapic, off, wval, arg);
 	return (error);
 }
 
@@ -196,6 +260,6 @@ lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
-	error = vlapic_read(vlapic, off, rval);
+	error = vlapic_read(vlapic, off, rval, arg);
 	return (error);
 }
diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h
index 1461185..c5c95aa 100644
--- a/sys/amd64/vmm/vmm_lapic.h
+++ b/sys/amd64/vmm/vmm_lapic.h
@@ -32,16 +32,16 @@
 struct vm;
 
 boolean_t lapic_msr(u_int num);
-int	lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval);
-int	lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval);
+int	lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval,
+	    bool *retu);
+int	lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval,
+	    bool *retu);
 
 int	lapic_mmio_read(void *vm, int cpu, uint64_t gpa,
 			uint64_t *rval, int size, void *arg);
 int	lapic_mmio_write(void *vm, int cpu, uint64_t gpa,
 			 uint64_t wval, int size, void *arg);
 
-int	lapic_timer_tick(struct vm *vm, int cpu);
-
 /*
  * Returns a vector between 32 and 255 if an interrupt is pending in the
  * IRR that can be delivered based on the current state of ISR and TPR.
@@ -84,4 +84,12 @@ lapic_intr_edge(struct vm *vm, int cpu, int vector)
 	return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_EDGE));
 }
 
+/*
+ * Triggers the LAPIC local interrupt (LVT) 'vector' on 'cpu'.  'cpu' can
+ * be set to -1 to trigger the interrupt on all CPUs.
+ */
+int	lapic_set_local_intr(struct vm *vm, int cpu, int vector);
+
+int	lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg);
+
 #endif
diff --git a/sys/amd64/vmm/vmm_msr.c b/sys/amd64/vmm/vmm_msr.c
index 4011bb5..03e0071 100644
--- a/sys/amd64/vmm/vmm_msr.c
+++ b/sys/amd64/vmm/vmm_msr.c
@@ -154,13 +154,13 @@ msr_num_to_idx(u_int num)
 }
 
 int
-emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
+emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val, bool *retu)
 {
 	int idx;
 	uint64_t *guest_msrs;
 
 	if (lapic_msr(num))
-		return (lapic_wrmsr(vm, cpu, num, val));
+		return (lapic_wrmsr(vm, cpu, num, val, retu));
 
 	idx = msr_num_to_idx(num);
 	if (idx < 0 || invalid_msr(idx))
@@ -181,14 +181,14 @@ emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
 }
 
 int
-emulate_rdmsr(struct vm *vm, int cpu, u_int num)
+emulate_rdmsr(struct vm *vm, int cpu, u_int num, bool *retu)
 {
 	int error, idx;
 	uint32_t eax, edx;
 	uint64_t result, *guest_msrs;
 
 	if (lapic_msr(num)) {
-		error = lapic_rdmsr(vm, cpu, num, &result);
+		error = lapic_rdmsr(vm, cpu, num, &result, retu);
 		goto done;
 	}
 
diff --git a/sys/amd64/vmm/vmm_msr.h b/sys/amd64/vmm/vmm_msr.h
index 8a1fda3..e070037 100644
--- a/sys/amd64/vmm/vmm_msr.h
+++ b/sys/amd64/vmm/vmm_msr.h
@@ -33,8 +33,9 @@
 struct vm;
 
 void	vmm_msr_init(void);
-int	emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val);
-int	emulate_rdmsr(struct vm *vm, int vcpu, u_int msr);
+int	emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val,
+	    bool *retu);
+int	emulate_rdmsr(struct vm *vm, int vcpu, u_int msr, bool *retu);
 void	guest_msrs_init(struct vm *vm, int cpu);
 void	guest_msr_valid(int msr);
 void	restore_host_msrs(struct vm *vm, int cpu);
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
index 07cd49f..818e7f2 100644
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -297,6 +297,16 @@ basl_fwrite_madt(FILE *fp)
 	EFPRINTF(fp, "\t\t\tTrigger Mode : 3\n");
 	EFPRINTF(fp, "\n");
 
+	/* Local APIC NMI is connected to LINT 1 on all CPUs */
+	EFPRINTF(fp, "[0001]\t\tSubtable Type : 04\n");
+	EFPRINTF(fp, "[0001]\t\tLength : 06\n");
+	EFPRINTF(fp, "[0001]\t\tProcessorId : FF\n");
+	EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n");
+	EFPRINTF(fp, "\t\t\tPolarity : 1\n");
+	EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n");
+	EFPRINTF(fp, "[0001]\t\tInterrupt : 01\n");
+	EFPRINTF(fp, "\n");
+
 	EFFLUSH(fp);
 
 	return (0);
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 745fbf7..6c4df9c 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mman.h>
 #include <sys/time.h>
 
+#include <machine/atomic.h>
 #include <machine/segments.h>
 
 #include <stdio.h>
@@ -86,8 +87,6 @@ static int pincpu = -1;
 static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic;
 static int virtio_msix = 1;
 
-static int foundcpus;
-
 static int strictio;
 
 static int acpi;
@@ -211,8 +210,7 @@ fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
 		exit(1);
 	}
 
-	cpumask |= 1 << vcpu;
-	foundcpus++;
+	atomic_set_int(&cpumask, 1 << vcpu);
 
 	/*
 	 * Set up the vmexit struct to allow execution to start
@@ -230,6 +228,20 @@ fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
 }
 
 static int
+fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
+{
+
+	if ((cpumask & (1 << vcpu)) == 0) {
+		fprintf(stderr, "addcpu: attempting to delete unknown cpu %d\n",
+		    vcpu);
+		exit(1);
+	}
+
+	atomic_clear_int(&cpumask, 1 << vcpu);
+	return (cpumask == 0);
+}
+
+static int
 vmexit_catch_reset(void)
 {
         stats.io_reset++;
@@ -333,6 +345,17 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 }
 
 static int
+vmexit_spindown_cpu(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+	int lastcpu;
+
+	lastcpu = fbsdrun_deletecpu(ctx, *pvcpu);
+	if (!lastcpu)
+		pthread_exit(NULL);
+	return (vmexit_catch_reset());
+}
+
+static int
 vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
@@ -423,6 +446,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
 	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
 	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
 	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
+	[VM_EXITCODE_SPINDOWN_CPU] = vmexit_spindown_cpu,
 };
 
 static void
diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c
index 2b4ca84..ea332d4 100644
--- a/usr.sbin/bhyve/mptbl.c
+++ b/usr.sbin/bhyve/mptbl.c
@@ -72,6 +72,9 @@ __FBSDID("$FreeBSD$");
 
 #define MPEP_FEATURES           (0xBFEBFBFF) /* XXX Intel i7 */
 
+/* Number of local intr entries */
+#define	MPEII_NUM_LOCAL_IRQ	2
+
 /* Number of i/o intr entries */
 #define	MPEII_MAX_IRQ		24
 
@@ -141,6 +144,30 @@ mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu)
 }
 
 static void
+mpt_build_localint_entries(int_entry_ptr mpie)
+{
+
+	/* Hardcode LINT0 as ExtINT on all CPUs. */
+	memset(mpie, 0, sizeof(*mpie));
+	mpie->type = MPCT_ENTRY_LOCAL_INT;
+	mpie->int_type = INTENTRY_TYPE_EXTINT;
+	mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
+	    INTENTRY_FLAGS_TRIGGER_CONFORM;
+	mpie->dst_apic_id = 0xff;
+	mpie->dst_apic_int = 0;
+	mpie++;
+
+	/* Hardcode LINT1 as NMI on all CPUs. */
+	memset(mpie, 0, sizeof(*mpie));
+	mpie->type = MPCT_ENTRY_LOCAL_INT;
+	mpie->int_type = INTENTRY_TYPE_NMI;
+	mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
+	    INTENTRY_FLAGS_TRIGGER_CONFORM;
+	mpie->dst_apic_id = 0xff;
+	mpie->dst_apic_int = 1;
+}
+
+static void
 mpt_build_bus_entries(bus_entry_ptr mpeb)
 {
 
@@ -284,6 +311,11 @@ mptable_build(struct vmctx *ctx, int ncpu)
 	curraddr += sizeof(*mpie) * MPEII_MAX_IRQ;
 	mpch->entry_count += MPEII_MAX_IRQ;
 
+	mpie = (int_entry_ptr)curraddr;
+	mpt_build_localint_entries(mpie);
+	curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ;
+	mpch->entry_count += MPEII_NUM_LOCAL_IRQ;
+
 	if (oem_tbl_start) {
 		mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE;
 		mpch->oem_table_size = oem_tbl_size;
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
index cdea967..5adb739 100644
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -853,19 +853,14 @@ msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
 		else
 			msgdata = pci_get_cfgdata16(pi, capoff + 8);
 
-		/*
-		 * XXX check delivery mode, destination mode etc
-		 */
 		mme = msgctrl & PCIM_MSICTRL_MME_MASK;
 		pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
 		if (pi->pi_msi.enabled) {
-			pi->pi_msi.cpu = (addrlo >> 12) & 0xff;
-			pi->pi_msi.vector = msgdata & 0xff;
-			pi->pi_msi.msgnum = 1 << (mme >> 4);
+			pi->pi_msi.addr = addrlo;
+			pi->pi_msi.msg_data = msgdata;
+			pi->pi_msi.maxmsgnum = 1 << (mme >> 4);
 		} else {
-			pi->pi_msi.cpu = 0;
-			pi->pi_msi.vector = 0;
-			pi->pi_msi.msgnum = 0;
+			pi->pi_msi.maxmsgnum = 0;
 		}
 	}
 
@@ -1143,10 +1138,10 @@ pci_msi_enabled(struct pci_devinst *pi)
 }
 
 int
-pci_msi_msgnum(struct pci_devinst *pi)
+pci_msi_maxmsgnum(struct pci_devinst *pi)
 {
 	if (pi->pi_msi.enabled)
-		return (pi->pi_msi.msgnum);
+		return (pi->pi_msi.maxmsgnum);
 	else
 		return (0);
 }
@@ -1175,19 +1170,17 @@ pci_generate_msix(struct pci_devinst *pi, int index)
 	mte = &pi->pi_msix.table[index];
 	if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
 		/* XXX Set PBA bit if interrupt is disabled */
-		vm_lapic_irq(pi->pi_vmctx,
-			     (mte->addr >> 12) & 0xff, mte->msg_data & 0xff);
+		vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data);
 	}
 }
 
 void
-pci_generate_msi(struct pci_devinst *pi, int msg)
+pci_generate_msi(struct pci_devinst *pi, int index)
 {
 
-	if (pci_msi_enabled(pi) && msg < pci_msi_msgnum(pi)) {
-		vm_lapic_irq(pi->pi_vmctx,
-			     pi->pi_msi.cpu,
-			     pi->pi_msi.vector + msg);
+	if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) {
+		vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr,
+			     pi->pi_msi.msg_data + index);
 	}
 }
 
@@ -1595,10 +1588,10 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
 		 * Special magic value to generate an interrupt
 		 */
 		if (offset == 4 && size == 4 && pci_msi_enabled(pi))
-			pci_generate_msi(pi, value % pci_msi_msgnum(pi));
+			pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi));
 
 		if (value == 0xabcdef) {
-			for (i = 0; i < pci_msi_msgnum(pi); i++)
+			for (i = 0; i < pci_msi_maxmsgnum(pi); i++)
 				pci_generate_msi(pi, i);
 		}
 	}
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
index b97c5b1..002924d 100644
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -112,10 +112,10 @@ struct pci_devinst {
 	int	  pi_bar_getsize;
 
 	struct {
-		int	enabled;
-		int	cpu;
-		int	vector;
-		int	msgnum;
+		int		enabled;
+		uint64_t	addr;
+		uint64_t	msg_data;
+		int		maxmsgnum;
 	} pi_msi;
 
 	struct {
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
index 43c542d..dab5ffc 100644
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -345,12 +345,10 @@ msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
 		/* If the entry is masked, don't set it up */
 		if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
 		    (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
-			error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
-					      sc->psc_sel.pc_dev, 
-					      sc->psc_sel.pc_func,
-					      index, entry->msg_data, 
-					      entry->vector_control,
-					      entry->addr);
+			error = vm_setup_pptdev_msix(ctx, vcpu,
+			    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 
+			    sc->psc_sel.pc_func, index, entry->addr,
+			    entry->msg_data, entry->vector_control);
 		}
 	}
 }
@@ -652,11 +650,12 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
 	if (msicap_access(sc, coff)) {
 		msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
 
-		error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus,
-			sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu,
-			pi->pi_msi.vector, pi->pi_msi.msgnum);
+		error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus,
+			sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
+			pi->pi_msi.addr, pi->pi_msi.msg_data,
+			pi->pi_msi.maxmsgnum);
 		if (error != 0) {
-			printf("vm_setup_msi returned error %d\r\n", errno);
+			printf("vm_setup_pptdev_msi error %d\r\n", errno);
 			exit(1);
 		}
 		return (0);
@@ -667,15 +666,16 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
 		if (pi->pi_msix.enabled) {
 			msix_table_entries = pi->pi_msix.table_count;
 			for (i = 0; i < msix_table_entries; i++) {
-				error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
-						      sc->psc_sel.pc_dev, 
-						      sc->psc_sel.pc_func, i, 
-						      pi->pi_msix.table[i].msg_data,
-						      pi->pi_msix.table[i].vector_control,
-						      pi->pi_msix.table[i].addr);
+				error = vm_setup_pptdev_msix(ctx, vcpu,
+				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 
+				    sc->psc_sel.pc_func, i, 
+				    pi->pi_msix.table[i].addr,
+				    pi->pi_msix.table[i].msg_data,
+				    pi->pi_msix.table[i].vector_control);
 		
 				if (error) {
-					printf("vm_setup_msix returned error %d\r\n", errno);
+					printf("vm_setup_pptdev_msix error "
+					    "%d\r\n", errno);
 					exit(1);	
 				}
 			}
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index 0e92204..c697492 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -189,12 +189,15 @@ usage(void)
 	"       [--set-mem=<memory in units of MB>]\n"
 	"       [--get-lowmem]\n"
 	"       [--get-highmem]\n"
-	"       [--get-gpa-pmap]\n",
+	"       [--get-gpa-pmap]\n"
+	"       [--assert-lapic-lvt=<pin>]\n"
+	"       [--inject-nmi]\n",
 	progname);
 	exit(1);
 }
 
 static int get_stats, getcap, setcap, capval, get_gpa_pmap;
+static int inject_nmi, assert_lapic_lvt;
 static const char *capname;
 static int create, destroy, get_lowmem, get_highmem;
 static uint64_t memsize;
@@ -379,6 +382,7 @@ enum {
 	CAPNAME,
 	UNASSIGN_PPTDEV,
 	GET_GPA_PMAP,
+	ASSERT_LAPIC_LVT,
 };
 
 int
@@ -431,6 +435,7 @@ main(int argc, char *argv[])
 		{ "unassign-pptdev", REQ_ARG,	0,	UNASSIGN_PPTDEV },
 		{ "setcap",	REQ_ARG,	0,	SET_CAP },
 		{ "get-gpa-pmap", REQ_ARG,	0,	GET_GPA_PMAP },
+		{ "assert-lapic-lvt", REQ_ARG,	0,	ASSERT_LAPIC_LVT },
 		{ "getcap",	NO_ARG,		&getcap,	1 },
 		{ "get-stats",	NO_ARG,		&get_stats,	1 },
 		{ "get-desc-ds",NO_ARG,		&get_desc_ds,	1 },
@@ -557,10 +562,12 @@ main(int argc, char *argv[])
 		{ "run",	NO_ARG,		&run,		1 },
 		{ "create",	NO_ARG,		&create,	1 },
 		{ "destroy",	NO_ARG,		&destroy,	1 },
+		{ "inject-nmi",	NO_ARG,		&inject_nmi,	1 },
 		{ NULL,		0,		NULL,		0 }
 	};
 
 	vcpu = 0;
+	assert_lapic_lvt = -1;
 	progname = basename(argv[0]);
 
 	while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) {
@@ -682,6 +689,9 @@ main(int argc, char *argv[])
 			if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3)
 				usage();
 			break;
+		case ASSERT_LAPIC_LVT:
+			assert_lapic_lvt = atoi(optarg);
+			break;
 		default:
 			usage();
 		}
@@ -825,6 +835,14 @@ main(int argc, char *argv[])
 					  vmcs_entry_interruption_info);
 	}
 
+	if (!error && inject_nmi) {
+		error = vm_inject_nmi(ctx, vcpu);
+	}
+
+	if (!error && assert_lapic_lvt != -1) {
+		error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt);
+	}
+
 	if (!error && (get_lowmem || get_all)) {
 		gpa = 0;
 		error = vm_get_memory_seg(ctx, gpa, &len, &wired);