MFC 259641,259863,259924,259937,259961,259978,260380,260383,260410,260466,

260531,260532,260550,260619,261170,261453,261621,263280,263290,264516: Add support for local APIC hardware-assist. - Restructure vlapic access and register handling to support hardware-assist for the local APIC. - Use the 'Virtual Interrupt Delivery' and 'Posted Interrupt Processing' feature of Intel VT-x if supported by hardware. - Add an API to rendezvous all active vcpus in a virtual machine and use it to support level triggered interrupts with VT-x 'Virtual Interrupt Delivery'. - Use a cheaper IPI handler than IPI_AST for nested page table shootdowns and avoid doing unnecessary nested TLB invalidations. Reviewed by: neel
author: jhb <jhb@FreeBSD.org> 2014-05-17 19:11:08 +0000
committer: jhb <jhb@FreeBSD.org> 2014-05-17 19:11:08 +0000
commit: bbf655f9b49cc39db4559ede5c58d302ff8f3de2 (patch)
tree: f6cf26193250fdea84a6946390d9759716c70b5c /sys/amd64
parent: 7e7928763170f8b10771c099cf46224daaf67bca (diff)
download: FreeBSD-src-bbf655f9b49cc39db4559ede5c58d302ff8f3de2.zip
FreeBSD-src-bbf655f9b49cc39db4559ede5c58d302ff8f3de2.tar.gz
24 files changed, 1753 insertions, 593 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 4aa66b5..2b61023 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1304,6 +1304,7 @@ pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va)
 static __inline void
 pmap_invalidate_ept(pmap_t pmap)
 {
+	int ipinum;
 
 	sched_pin();
 	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
@@ -1328,11 +1329,9 @@ pmap_invalidate_ept(pmap_t pmap)
 
 	/*
 	 * Force the vcpu to exit and trap back into the hypervisor.
-	 *
-	 * XXX this is not optimal because IPI_AST builds a trapframe
-	 * whereas all we need is an 'eoi' followed by 'iret'.
 	 */
-	ipi_selected(pmap->pm_active, IPI_AST);
+	ipinum = pmap->pm_flags & PMAP_NESTED_IPIMASK;
+	ipi_selected(pmap->pm_active, ipinum);
 	sched_unpin();
 }
 
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 1b5f6a0..e83e07e 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -312,9 +312,10 @@ struct pmap {
 };
 
 /* flags */
-#define	PMAP_PDE_SUPERPAGE	(1 << 0)	/* supports 2MB superpages */
-#define	PMAP_EMULATE_AD_BITS	(1 << 1)	/* needs A/D bits emulation */
-#define	PMAP_SUPPORTS_EXEC_ONLY	(1 << 2)	/* execute only mappings ok */
+#define	PMAP_NESTED_IPIMASK	0xff
+#define	PMAP_PDE_SUPERPAGE	(1 << 8)	/* supports 2MB superpages */
+#define	PMAP_EMULATE_AD_BITS	(1 << 9)	/* needs A/D bits emulation */
+#define	PMAP_SUPPORTS_EXEC_ONLY	(1 << 10)	/* execute only mappings ok */
 
 typedef struct pmap	*pmap_t;
 
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 92b767f..fab7e74 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -47,12 +47,12 @@ struct pmap;
 
 enum x2apic_state;
 
-typedef int	(*vmm_init_func_t)(void);
+typedef int	(*vmm_init_func_t)(int ipinum);
 typedef int	(*vmm_cleanup_func_t)(void);
 typedef void	(*vmm_resume_func_t)(void);
 typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
 typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
-				  struct pmap *pmap);
+				  struct pmap *pmap, void *rendezvous_cookie);
 typedef void	(*vmi_cleanup_func_t)(void *vmi);
 typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t *retval);
@@ -69,6 +69,8 @@ typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
 typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
 typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
 typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
+typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
+typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
 
 struct vmm_ops {
 	vmm_init_func_t		init;		/* module wide initialization */
@@ -87,6 +89,8 @@ struct vmm_ops {
 	vmi_set_cap_t		vmsetcap;
 	vmi_vmspace_alloc	vmspace_alloc;
 	vmi_vmspace_free	vmspace_free;
+	vmi_vlapic_init		vlapic_init;
+	vmi_vlapic_cleanup	vlapic_cleanup;
 };
 
 extern struct vmm_ops vmm_ops_intel;
@@ -132,6 +136,31 @@ cpuset_t vm_active_cpus(struct vm *vm);
 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
 
 /*
+ * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
+ * The rendezvous 'func(arg)' is not allowed to do anything that will
+ * cause the thread to be put to sleep.
+ *
+ * If the rendezvous is being initiated from a vcpu context then the
+ * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
+ *
+ * The caller cannot hold any locks when initiating the rendezvous.
+ *
+ * The implementation of this API may cause vcpus other than those specified
+ * by 'dest' to be stalled. The caller should not rely on any vcpus making
+ * forward progress when the rendezvous is in progress.
+ */
+typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
+void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
+    vm_rendezvous_func_t func, void *arg);
+
+static __inline int
+vcpu_rendezvous_pending(void *rendezvous_cookie)
+{
+
+	return (*(uintptr_t *)rendezvous_cookie != 0);
+}
+
+/*
  * Return 1 if device indicated by bus/slot/func is supposed to be a
  * pci passthrough device.
  *
@@ -158,7 +187,7 @@ vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 }
 
 void *vcpu_stats(struct vm *vm, int vcpu);
-void vcpu_notify_event(struct vm *vm, int vcpuid);
+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
 struct vmspace *vm_get_vmspace(struct vm *vm);
 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
@@ -267,6 +296,8 @@ enum vm_exitcode {
 	VM_EXITCODE_INST_EMUL,
 	VM_EXITCODE_SPINUP_AP,
 	VM_EXITCODE_SPINDOWN_CPU,
+	VM_EXITCODE_RENDEZVOUS,
+	VM_EXITCODE_IOAPIC_EOI,
 	VM_EXITCODE_MAX
 };
 
@@ -323,6 +354,9 @@ struct vm_exit {
 		struct {
 			uint64_t	rflags;
 		} hlt;
+		struct {
+			int		vector;
+		} ioapic_eoi;
 	} u;
 };
 
diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c
index 6c87901..00484c7 100644
--- a/sys/amd64/vmm/amd/amdv.c
+++ b/sys/amd64/vmm/amd/amdv.c
@@ -38,7 +38,7 @@ __FBSDID("$FreeBSD$");
 #include "io/iommu.h"
 
 static int
-amdv_init(void)
+amdv_init(int ipinum)
 {
 
 	printf("amdv_init: not implemented\n");
@@ -67,7 +67,7 @@ amdv_vminit(struct vm *vm, struct pmap *pmap)
 }
 
 static int
-amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap)
+amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap, void *cookie)
 {
 
 	printf("amdv_vmrun: not implemented\n");
@@ -155,6 +155,20 @@ amdv_vmspace_free(struct vmspace *vmspace)
 	return;
 }
 
+static struct vlapic *
+amdv_vlapic_init(void *arg, int vcpuid)
+{
+
+	panic("amdv_vlapic_init: not implmented");
+}
+
+static void
+amdv_vlapic_cleanup(void *arg, struct vlapic *vlapic)
+{
+
+	panic("amdv_vlapic_cleanup: not implemented");
+}
+
 struct vmm_ops vmm_ops_amd = {
 	amdv_init,
 	amdv_cleanup,
@@ -171,6 +185,8 @@ struct vmm_ops vmm_ops_amd = {
 	amdv_setcap,
 	amdv_vmspace_alloc,
 	amdv_vmspace_free,
+	amdv_vlapic_init,
+	amdv_vlapic_cleanup,
 };
 
 static int
diff --git a/sys/amd64/vmm/intel/ept.c b/sys/amd64/vmm/intel/ept.c
index 18e90f3..5f6c4d0 100644
--- a/sys/amd64/vmm/intel/ept.c
+++ b/sys/amd64/vmm/intel/ept.c
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/vmm.h>
 
 #include "vmx_cpufunc.h"
+#include "vmm_ipi.h"
 #include "vmx_msr.h"
 #include "ept.h"
 
@@ -76,7 +77,7 @@ SYSCTL_INT(_hw_vmm_ept, OID_AUTO, pmap_flags, CTLFLAG_RD,
     &ept_pmap_flags, 0, NULL);
 
 int
-ept_init(void)
+ept_init(int ipinum)
 {
 	int use_hw_ad_bits, use_superpages, use_exec_only;
 	uint64_t cap;
@@ -98,6 +99,8 @@ ept_init(void)
 	    !INVEPT_ALL_TYPES_SUPPORTED(cap))
 		return (EINVAL);
 
+	ept_pmap_flags = ipinum & PMAP_NESTED_IPIMASK;
+
 	use_superpages = 1;
 	TUNABLE_INT_FETCH("hw.vmm.ept.use_superpages", &use_superpages);
 	if (use_superpages && EPT_PDE_SUPERPAGE(cap))
diff --git a/sys/amd64/vmm/intel/ept.h b/sys/amd64/vmm/intel/ept.h
index dfd3a44..1393e46 100644
--- a/sys/amd64/vmm/intel/ept.h
+++ b/sys/amd64/vmm/intel/ept.h
@@ -31,7 +31,7 @@
 
 struct vmx;
 
-int	ept_init(void);
+int	ept_init(int ipinum);
 void	ept_invalidate_mappings(u_long eptp);
 struct vmspace *ept_vmspace_alloc(vm_offset_t min, vm_offset_t max);
 void	ept_vmspace_free(struct vmspace *vmspace);
diff --git a/sys/amd64/vmm/intel/vmcs.c b/sys/amd64/vmm/intel/vmcs.c
index 980eac1..1ddefe0 100644
--- a/sys/amd64/vmm/intel/vmcs.c
+++ b/sys/amd64/vmm/intel/vmcs.c
@@ -315,11 +315,7 @@ done:
 }
 
 int
-vmcs_set_defaults(struct vmcs *vmcs,
-		  u_long host_rip, u_long host_rsp, uint64_t eptp,
-		  uint32_t pinbased_ctls, uint32_t procbased_ctls,
-		  uint32_t procbased_ctls2, uint32_t exit_ctls,
-		  uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
+vmcs_init(struct vmcs *vmcs)
 {
 	int error, codesel, datasel, tsssel;
 	u_long cr0, cr4, efer;
@@ -335,22 +331,6 @@ vmcs_set_defaults(struct vmcs *vmcs,
 	 */
 	VMPTRLD(vmcs);
 
-	/*
-	 * Load the VMX controls
-	 */
-	if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
-		goto done;
-	if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
-		goto done;
-	if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
-		goto done;
-	if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
-		goto done;
-	if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
-		goto done;
-
-	/* Guest state */
-
 	/* Initialize guest IA32_PAT MSR with the default value */
 	pat = PAT_VALUE(0, PAT_WRITE_BACK)	|
 	      PAT_VALUE(1, PAT_WRITE_THROUGH)	|
@@ -422,23 +402,7 @@ vmcs_set_defaults(struct vmcs *vmcs,
 		goto done;
 
 	/* instruction pointer */
-	if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
-		goto done;
-
-	/* stack pointer */
-	if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
-		goto done;
-
-	/* eptp */
-	if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
-		goto done;
-
-	/* vpid */
-	if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
-		goto done;
-
-	/* msr bitmap */
-	if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
+	if ((error = vmwrite(VMCS_HOST_RIP, (u_long)vmx_exit_guest)) != 0)
 		goto done;
 
 	/* exception bitmap */
@@ -509,7 +473,7 @@ DB_SHOW_COMMAND(vmcs, db_show_vmcs)
 	switch (exit & 0x8000ffff) {
 	case EXIT_REASON_EXCEPTION:
 	case EXIT_REASON_EXT_INTR:
-		val = vmcs_read(VMCS_EXIT_INTERRUPTION_INFO);
+		val = vmcs_read(VMCS_EXIT_INTR_INFO);
 		db_printf("Interrupt Type: ");
 		switch (val >> 8 & 0x7) {
 		case 0:
@@ -531,7 +495,7 @@ DB_SHOW_COMMAND(vmcs, db_show_vmcs)
 		db_printf("  Vector: %lu", val & 0xff);
 		if (val & 0x800)
 			db_printf("  Error Code: %lx",
-			    vmcs_read(VMCS_EXIT_INTERRUPTION_ERROR));
+			    vmcs_read(VMCS_EXIT_INTR_ERRCODE));
 		db_printf("\n");
 		break;
 	case EXIT_REASON_EPT_FAULT:
diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h
index b1e2883..fa03826 100644
--- a/sys/amd64/vmm/intel/vmcs.h
+++ b/sys/amd64/vmm/intel/vmcs.h
@@ -46,12 +46,7 @@ struct msr_entry {
 };
 
 int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
-int	vmcs_set_defaults(struct vmcs *vmcs, u_long host_rip, u_long host_rsp,
-			  uint64_t eptp,
-			  uint32_t pinbased_ctls, uint32_t procbased_ctls,
-			  uint32_t procbased_ctls2, uint32_t exit_ctls,
-			  uint32_t entry_ctls, u_long msr_bitmap,
-			  uint16_t vpid);
+int	vmcs_init(struct vmcs *vmcs);
 int	vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv);
 int	vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val);
 int	vmcs_getdesc(struct vmcs *vmcs, int ident,
@@ -102,6 +97,7 @@ vmcs_write(uint32_t encoding, uint64_t val)
 
 /* 16-bit control fields */
 #define	VMCS_VPID			0x00000000
+#define	VMCS_PIR_VECTOR			0x00000002
 
 /* 16-bit guest-state fields */
 #define	VMCS_GUEST_ES_SELECTOR		0x00000800
@@ -112,6 +108,7 @@ vmcs_write(uint32_t encoding, uint64_t val)
 #define	VMCS_GUEST_GS_SELECTOR		0x0000080A
 #define	VMCS_GUEST_LDTR_SELECTOR	0x0000080C
 #define	VMCS_GUEST_TR_SELECTOR		0x0000080E
+#define	VMCS_GUEST_INTR_STATUS		0x00000810
 
 /* 16-bit host-state fields */
 #define	VMCS_HOST_ES_SELECTOR		0x00000C00
@@ -133,7 +130,13 @@ vmcs_write(uint32_t encoding, uint64_t val)
 #define	VMCS_TSC_OFFSET			0x00002010
 #define	VMCS_VIRTUAL_APIC		0x00002012
 #define	VMCS_APIC_ACCESS		0x00002014
+#define	VMCS_PIR_DESC			0x00002016
 #define	VMCS_EPTP			0x0000201A
+#define	VMCS_EOI_EXIT0			0x0000201C
+#define	VMCS_EOI_EXIT1			0x0000201E
+#define	VMCS_EOI_EXIT2			0x00002020
+#define	VMCS_EOI_EXIT3			0x00002022
+#define	VMCS_EOI_EXIT(vector)		(VMCS_EOI_EXIT0 + ((vector) / 64) * 2)
 
 /* 64-bit read-only fields */
 #define	VMCS_GUEST_PHYSICAL_ADDRESS	0x00002400
@@ -177,8 +180,8 @@ vmcs_write(uint32_t encoding, uint64_t val)
 /* 32-bit read-only data fields */
 #define	VMCS_INSTRUCTION_ERROR		0x00004400
 #define	VMCS_EXIT_REASON		0x00004402
-#define	VMCS_EXIT_INTERRUPTION_INFO	0x00004404
-#define	VMCS_EXIT_INTERRUPTION_ERROR	0x00004406
+#define	VMCS_EXIT_INTR_INFO		0x00004404
+#define	VMCS_EXIT_INTR_ERRCODE		0x00004406
 #define	VMCS_IDT_VECTORING_INFO		0x00004408
 #define	VMCS_IDT_VECTORING_ERROR	0x0000440A
 #define	VMCS_EXIT_INSTRUCTION_LENGTH	0x0000440C
@@ -315,7 +318,8 @@ vmcs_write(uint32_t encoding, uint64_t val)
 #define EXIT_REASON_PAUSE		40
 #define EXIT_REASON_MCE			41
 #define EXIT_REASON_TPR			43
-#define EXIT_REASON_APIC		44
+#define EXIT_REASON_APIC_ACCESS		44
+#define	EXIT_REASON_VIRTUALIZED_EOI	45
 #define EXIT_REASON_GDTR_IDTR		46
 #define EXIT_REASON_LDTR_TR		47
 #define EXIT_REASON_EPT_FAULT		48
@@ -326,13 +330,15 @@ vmcs_write(uint32_t encoding, uint64_t val)
 #define EXIT_REASON_INVVPID		53
 #define EXIT_REASON_WBINVD		54
 #define EXIT_REASON_XSETBV		55
+#define	EXIT_REASON_APIC_WRITE		56
 
 /*
  * VMCS interrupt information fields
  */
-#define	VMCS_INTERRUPTION_INFO_VALID	(1U << 31)
-#define	VMCS_INTERRUPTION_INFO_HW_INTR	(0 << 8)
-#define	VMCS_INTERRUPTION_INFO_NMI	(2 << 8)
+#define	VMCS_INTR_INFO_VALID		(1U << 31)
+#define	VMCS_INTR_INFO_TYPE(info)	(((info) >> 8) & 0x7)
+#define	VMCS_INTR_INFO_HW_INTR		(0 << 8)
+#define	VMCS_INTR_INFO_NMI		(2 << 8)
 
 /*
  * VMCS IDT-Vectoring information fields
@@ -365,4 +371,15 @@ vmcs_write(uint32_t encoding, uint64_t val)
 #define	EPT_VIOLATION_GLA_VALID		(1UL << 7)
 #define	EPT_VIOLATION_XLAT_VALID	(1UL << 8)
 
+/*
+ * Exit qualification for APIC-access VM exit
+ */
+#define	APIC_ACCESS_OFFSET(qual)	((qual) & 0xFFF)
+#define	APIC_ACCESS_TYPE(qual)		(((qual) >> 12) & 0xF)
+
+/*
+ * Exit qualification for APIC-write VM exit
+ */
+#define	APIC_WRITE_OFFSET(qual)		((qual) & 0xFFF)
+
 #endif
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index bcaed4e..b79d174 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -45,15 +45,18 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
+#include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/vmparam.h>
 
 #include <machine/vmm.h>
 #include "vmm_host.h"
-#include "vmm_lapic.h"
+#include "vmm_ipi.h"
 #include "vmm_msr.h"
 #include "vmm_ktr.h"
 #include "vmm_stat.h"
+#include "vlapic.h"
+#include "vlapic_priv.h"
 
 #include "vmx_msr.h"
 #include "ept.h"
@@ -92,6 +95,7 @@ __FBSDID("$FreeBSD$");
 
 #define	VM_EXIT_CTLS_ONE_SETTING					\
 	(VM_EXIT_CTLS_ONE_SETTING_NO_PAT       	|			\
+	VM_EXIT_ACKNOWLEDGE_INTERRUPT		|			\
 	VM_EXIT_SAVE_PAT			|			\
 	VM_EXIT_LOAD_PAT)
 #define	VM_EXIT_CTLS_ZERO_SETTING	VM_EXIT_SAVE_DEBUG_CONTROLS
@@ -112,7 +116,8 @@ __FBSDID("$FreeBSD$");
 #define	HANDLED		1
 #define	UNHANDLED	0
 
-MALLOC_DEFINE(M_VMX, "vmx", "vmx");
+static MALLOC_DEFINE(M_VMX, "vmx", "vmx");
+static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
 
 SYSCTL_DECL(_hw_vmm);
 SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW, NULL, NULL);
@@ -164,12 +169,33 @@ static int cap_pause_exit;
 static int cap_unrestricted_guest;
 static int cap_monitor_trap;
 static int cap_invpcid;
- 
+
+static int virtual_interrupt_delivery;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD,
+    &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support");
+
+static int posted_interrupts;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupts, CTLFLAG_RD,
+    &posted_interrupts, 0, "APICv posted interrupt support");
+
+static int pirvec;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupt_vector, CTLFLAG_RD,
+    &pirvec, 0, "APICv posted interrupt vector");
+
 static struct unrhdr *vpid_unr;
 static u_int vpid_alloc_failed;
 SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
 	    &vpid_alloc_failed, 0, NULL);
 
+/*
+ * Use the last page below 4GB as the APIC access address. This address is
+ * occupied by the boot firmware so it is guaranteed that it will not conflict
+ * with a page in system memory.
+ */
+#define	APIC_ACCESS_ADDRESS	0xFFFFF000
+
+static void vmx_inject_pir(struct vlapic *vlapic);
+
 #ifdef KTR
 static const char *
 exit_reason_to_str(int reason)
@@ -259,8 +285,8 @@ exit_reason_to_str(int reason)
 		return "mce";
 	case EXIT_REASON_TPR:
 		return "tpr";
-	case EXIT_REASON_APIC:
-		return "apic";
+	case EXIT_REASON_APIC_ACCESS:
+		return "apic-access";
 	case EXIT_REASON_GDTR_IDTR:
 		return "gdtridtr";
 	case EXIT_REASON_LDTR_TR:
@@ -281,6 +307,8 @@ exit_reason_to_str(int reason)
 		return "wbinvd";
 	case EXIT_REASON_XSETBV:
 		return "xsetbv";
+	case EXIT_REASON_APIC_WRITE:
+		return "apic-write";
 	default:
 		snprintf(reasonbuf, sizeof(reasonbuf), "%d", reason);
 		return (reasonbuf);
@@ -424,6 +452,9 @@ vmx_disable(void *arg __unused)
 static int
 vmx_cleanup(void)
 {
+	
+	if (pirvec != 0)
+		vmm_ipi_free(pirvec);
 
 	if (vpid_unr != NULL) {
 		delete_unrhdr(vpid_unr);
@@ -457,11 +488,11 @@ vmx_restore(void)
 }
 
 static int
-vmx_init(void)
+vmx_init(int ipinum)
 {
-	int error;
+	int error, use_tpr_shadow;
 	uint64_t fixed0, fixed1, feature_control;
-	uint32_t tmp;
+	uint32_t tmp, procbased2_vid_bits;
 
 	/* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */
 	if (!(cpu_feature2 & CPUID2_VMX)) {
@@ -595,9 +626,58 @@ vmx_init(void)
 	    MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0,
 	    &tmp) == 0);
 
+	/*
+	 * Check support for virtual interrupt delivery.
+	 */
+	procbased2_vid_bits = (PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
+	    PROCBASED2_VIRTUALIZE_X2APIC_MODE |
+	    PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
+	    PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
+
+	use_tpr_shadow = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+	    MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
+	    &tmp) == 0);
+
+	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
+	    procbased2_vid_bits, 0, &tmp);
+	if (error == 0 && use_tpr_shadow) {
+		virtual_interrupt_delivery = 1;
+		TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid",
+		    &virtual_interrupt_delivery);
+	}
+
+	if (virtual_interrupt_delivery) {
+		procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
+		procbased_ctls2 |= procbased2_vid_bits;
+		procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE;
+
+		/*
+		 * Check for Posted Interrupts only if Virtual Interrupt
+		 * Delivery is enabled.
+		 */
+		error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
+		    MSR_VMX_TRUE_PINBASED_CTLS, PINBASED_POSTED_INTERRUPT, 0,
+		    &tmp);
+		if (error == 0) {
+			pirvec = vmm_ipi_alloc();
+			if (pirvec == 0) {
+				if (bootverbose) {
+					printf("vmx_init: unable to allocate "
+					    "posted interrupt vector\n");
+				}
+			} else {
+				posted_interrupts = 1;
+				TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_pir",
+				    &posted_interrupts);
+			}
+		}
+	}
+
+	if (posted_interrupts)
+		    pinbased_ctls |= PINBASED_POSTED_INTERRUPT;
 
 	/* Initialize EPT */
-	error = ept_init();
+	error = ept_init(ipinum);
 	if (error) {
 		printf("vmx_init: ept initialization failed (%d)\n", error);
 		return (error);
@@ -638,6 +718,31 @@ vmx_init(void)
 	return (0);
 }
 
+static void
+vmx_trigger_hostintr(int vector)
+{
+	uintptr_t func;
+	struct gate_descriptor *gd;
+
+	gd = &idt[vector];
+
+	KASSERT(vector >= 32 && vector <= 255, ("vmx_trigger_hostintr: "
+	    "invalid vector %d", vector));
+	KASSERT(gd->gd_p == 1, ("gate descriptor for vector %d not present",
+	    vector));
+	KASSERT(gd->gd_type == SDT_SYSIGT, ("gate descriptor for vector %d "
+	    "has invalid type %d", vector, gd->gd_type));
+	KASSERT(gd->gd_dpl == SEL_KPL, ("gate descriptor for vector %d "
+	    "has invalid dpl %d", vector, gd->gd_dpl));
+	KASSERT(gd->gd_selector == GSEL(GCODE_SEL, SEL_KPL), ("gate descriptor "
+	    "for vector %d has invalid selector %d", vector, gd->gd_selector));
+	KASSERT(gd->gd_ist == 0, ("gate descriptor for vector %d has invalid "
+	    "IST %d", vector, gd->gd_ist));
+
+	func = ((long)gd->gd_hioffset << 16 | gd->gd_looffset);
+	vmx_call_isr(func);
+}
+
 static int
 vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial)
 {
@@ -676,6 +781,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 	uint16_t vpid[VM_MAXCPU];
 	int i, error, guest_msr_count;
 	struct vmx *vmx;
+	struct vmcs *vmcs;
 
 	vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO);
 	if ((uintptr_t)vmx & PAGE_MASK) {
@@ -740,27 +846,52 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 
 	vpid_alloc(vpid, VM_MAXCPU);
 
+	if (virtual_interrupt_delivery) {
+		error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE,
+		    APIC_ACCESS_ADDRESS);
+		/* XXX this should really return an error to the caller */
+		KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error));
+	}
+
 	for (i = 0; i < VM_MAXCPU; i++) {
-		vmx->vmcs[i].identifier = vmx_revision();
-		error = vmclear(&vmx->vmcs[i]);
+		vmcs = &vmx->vmcs[i];
+		vmcs->identifier = vmx_revision();
+		error = vmclear(vmcs);
 		if (error != 0) {
 			panic("vmx_vminit: vmclear error %d on vcpu %d\n",
 			      error, i);
 		}
 
-		error = vmcs_set_defaults(&vmx->vmcs[i],
-					  (u_long)vmx_exit_guest,
-					  (u_long)&vmx->ctx[i],
-					  vmx->eptp,
-					  pinbased_ctls,
-					  procbased_ctls,
-					  procbased_ctls2,
-					  exit_ctls, entry_ctls,
-					  vtophys(vmx->msr_bitmap),
-					  vpid[i]);
+		error = vmcs_init(vmcs);
+		KASSERT(error == 0, ("vmcs_init error %d", error));
 
-		if (error != 0)
-			panic("vmx_vminit: vmcs_set_defaults error %d", error);
+		VMPTRLD(vmcs);
+		error = 0;
+		error += vmwrite(VMCS_HOST_RSP, (u_long)&vmx->ctx[i]);
+		error += vmwrite(VMCS_EPTP, vmx->eptp);
+		error += vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls);
+		error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls);
+		error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2);
+		error += vmwrite(VMCS_EXIT_CTLS, exit_ctls);
+		error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
+		error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap));
+		error += vmwrite(VMCS_VPID, vpid[i]);
+		if (virtual_interrupt_delivery) {
+			error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
+			error += vmwrite(VMCS_VIRTUAL_APIC,
+			    vtophys(&vmx->apic_page[i]));
+			error += vmwrite(VMCS_EOI_EXIT0, 0);
+			error += vmwrite(VMCS_EOI_EXIT1, 0);
+			error += vmwrite(VMCS_EOI_EXIT2, 0);
+			error += vmwrite(VMCS_EOI_EXIT3, 0);
+		}
+		if (posted_interrupts) {
+			error += vmwrite(VMCS_PIR_VECTOR, pirvec);
+			error += vmwrite(VMCS_PIR_DESC,
+			    vtophys(&vmx->pir_desc[i]));
+		}
+		VMCLEAR(vmcs);
+		KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
 
 		vmx->cap[i].set = 0;
 		vmx->cap[i].proc_ctls = procbased_ctls;
@@ -771,9 +902,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 
 		msr_save_area_init(vmx->guest_msrs[i], &guest_msr_count);
 
-		error = vmcs_set_msr_save(&vmx->vmcs[i],
-					  vtophys(vmx->guest_msrs[i]),
-					  guest_msr_count);
+		error = vmcs_set_msr_save(vmcs, vtophys(vmx->guest_msrs[i]),
+		    guest_msr_count);
 		if (error != 0)
 			panic("vmcs_set_msr_save error %d", error);
 
@@ -783,16 +913,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 		 *  CR0 - 0x60000010
 		 *  CR4 - 0
 		 */
-		error = vmx_setup_cr0_shadow(&vmx->vmcs[i], 0x60000010);
+		error = vmx_setup_cr0_shadow(vmcs, 0x60000010);
 		if (error != 0)
 			panic("vmx_setup_cr0_shadow %d", error);
 
-		error = vmx_setup_cr4_shadow(&vmx->vmcs[i], 0);
+		error = vmx_setup_cr4_shadow(vmcs, 0);
 		if (error != 0)
 			panic("vmx_setup_cr4_shadow %d", error);
 
 		vmx->ctx[i].pmap = pmap;
-		vmx->ctx[i].eptp = vmx->eptp;
 	}
 
 	return (vmx);
@@ -840,20 +969,20 @@ vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip)
 #endif
 }
 
+static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved");
+
 static void
-vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu)
+vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
 {
-	int lastcpu;
 	struct vmxstate *vmxstate;
-	struct invvpid_desc invvpid_desc = { 0 };
+	struct invvpid_desc invvpid_desc;
 
 	vmxstate = &vmx->state[vcpu];
-	lastcpu = vmxstate->lastcpu;
-	vmxstate->lastcpu = curcpu;
-
-	if (lastcpu == curcpu)
+	if (vmxstate->lastcpu == curcpu)
 		return;
 
+	vmxstate->lastcpu = curcpu;
+
 	vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1);
 
 	vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase());
@@ -876,8 +1005,20 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu)
 	 * for "all" EP4TAs.
 	 */
 	if (vmxstate->vpid != 0) {
-		invvpid_desc.vpid = vmxstate->vpid;
-		invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc);
+		if (pmap->pm_eptgen == vmx->eptgen[curcpu]) {
+			invvpid_desc._res1 = 0;
+			invvpid_desc._res2 = 0;
+			invvpid_desc.vpid = vmxstate->vpid;
+			invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc);
+		} else {
+			/*
+			 * The invvpid can be skipped if an invept is going to
+			 * be performed before entering the guest. The invept
+			 * will invalidate combined mappings tagged with
+			 * 'vmx->eptp' for all vpids.
+			 */
+			vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_SAVED, 1);
+		}
 	}
 }
 
@@ -935,7 +1076,7 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu)
 	 * Inject the virtual NMI. The vector must be the NMI IDT entry
 	 * or the VMCS entry check will fail.
 	 */
-	info = VMCS_INTERRUPTION_INFO_NMI | VMCS_INTERRUPTION_INFO_VALID;
+	info = VMCS_INTR_INFO_NMI | VMCS_INTR_INFO_VALID;
 	info |= IDT_NMI;
 	vmcs_write(VMCS_ENTRY_INTR_INFO, info);
 
@@ -957,7 +1098,7 @@ nmiblocked:
 }
 
 static void
-vmx_inject_interrupts(struct vmx *vmx, int vcpu)
+vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
 {
 	int vector;
 	uint64_t info, rflags, interruptibility;
@@ -973,7 +1114,7 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu)
 	 * because of a pending AST.
 	 */
 	info = vmcs_read(VMCS_ENTRY_INTR_INFO);
-	if (info & VMCS_INTERRUPTION_INFO_VALID)
+	if (info & VMCS_INTR_INFO_VALID)
 		return;
 
 	/*
@@ -982,9 +1123,13 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu)
 	if (vmx_inject_nmi(vmx, vcpu))
 		return;
 
+	if (virtual_interrupt_delivery) {
+		vmx_inject_pir(vlapic);
+		return;
+	}
+
 	/* Ask the local apic for a vector to inject */
-	vector = lapic_pending_intr(vmx->vm, vcpu);
-	if (vector < 0)
+	if (!vlapic_pending_intr(vlapic, &vector))
 		return;
 
 	if (vector < 32 || vector > 255)
@@ -1000,12 +1145,12 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu)
 		goto cantinject;
 
 	/* Inject the interrupt */
-	info = VMCS_INTERRUPTION_INFO_HW_INTR | VMCS_INTERRUPTION_INFO_VALID;
+	info = VMCS_INTR_INFO_HW_INTR | VMCS_INTR_INFO_VALID;
 	info |= vector;
 	vmcs_write(VMCS_ENTRY_INTR_INFO, info);
 
 	/* Update the Local APIC ISR */
-	lapic_intr_accepted(vmx->vm, vcpu, vector);
+	vlapic_intr_accepted(vlapic, vector);
 
 	VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector);
 
@@ -1175,11 +1320,141 @@ ept_emulation_fault(uint64_t ept_qual)
 }
 
 static int
+vmx_handle_apic_write(struct vlapic *vlapic, uint64_t qual)
+{
+	int error, handled, offset;
+	bool retu;
+
+	if (!virtual_interrupt_delivery)
+		return (UNHANDLED);
+
+	handled = 1;
+	offset = APIC_WRITE_OFFSET(qual);
+	switch (offset) {
+	case APIC_OFFSET_ID:
+		vlapic_id_write_handler(vlapic);
+		break;
+	case APIC_OFFSET_LDR:
+		vlapic_ldr_write_handler(vlapic);
+		break;
+	case APIC_OFFSET_DFR:
+		vlapic_dfr_write_handler(vlapic);
+		break;
+	case APIC_OFFSET_SVR:
+		vlapic_svr_write_handler(vlapic);
+		break;
+	case APIC_OFFSET_ESR:
+		vlapic_esr_write_handler(vlapic);
+		break;
+	case APIC_OFFSET_ICR_LOW:
+		retu = false;
+		error = vlapic_icrlo_write_handler(vlapic, &retu);
+		if (error != 0 || retu)
+			handled = 0;
+		break;
+	case APIC_OFFSET_CMCI_LVT:
+	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+		vlapic_lvt_write_handler(vlapic, offset);
+		break;
+	case APIC_OFFSET_TIMER_ICR:
+		vlapic_icrtmr_write_handler(vlapic);
+		break;
+	case APIC_OFFSET_TIMER_DCR:
+		vlapic_dcr_write_handler(vlapic);
+		break;
+	default:
+		handled = 0;
+		break;
+	}
+	return (handled);
+}
+
+static bool
+apic_access_fault(uint64_t gpa)
+{
+
+	if (virtual_interrupt_delivery &&
+	    (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE))
+		return (true);
+	else
+		return (false);
+}
+
+static int
+vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
+{
+	uint64_t qual;
+	int access_type, offset, allowed;
+
+	if (!virtual_interrupt_delivery)
+		return (UNHANDLED);
+
+	qual = vmexit->u.vmx.exit_qualification;
+	access_type = APIC_ACCESS_TYPE(qual);
+	offset = APIC_ACCESS_OFFSET(qual);
+
+	allowed = 0;
+	if (access_type == 0) {
+		/*
+		 * Read data access to the following registers is expected.
+		 */
+		switch (offset) {
+		case APIC_OFFSET_APR:
+		case APIC_OFFSET_PPR:
+		case APIC_OFFSET_RRR:
+		case APIC_OFFSET_CMCI_LVT:
+		case APIC_OFFSET_TIMER_CCR:
+			allowed = 1;
+			break;
+		default:
+			break;
+		}
+	} else if (access_type == 1) {
+		/*
+		 * Write data access to the following registers is expected.
+		 */
+		switch (offset) {
+		case APIC_OFFSET_VER:
+		case APIC_OFFSET_APR:
+		case APIC_OFFSET_PPR:
+		case APIC_OFFSET_RRR:
+		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
+		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
+		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
+		case APIC_OFFSET_CMCI_LVT:
+		case APIC_OFFSET_TIMER_CCR:
+			allowed = 1;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (allowed) {
+		vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+		vmexit->u.inst_emul.gpa = DEFAULT_APIC_BASE + offset;
+		vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
+		vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
+	}
+
+	/*
+	 * Regardless of whether the APIC-access is allowed this handler
+	 * always returns UNHANDLED:
+	 * - if the access is allowed then it is handled by emulating the
+	 *   instruction that caused the VM-exit (outside the critical section)
+	 * - if the access is not allowed then it will be converted to an
+	 *   exitcode of VM_EXITCODE_VMX and will be dealt with in userland.
+	 */
+	return (UNHANDLED);
+}
+
+static int
 vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 {
 	int error, handled;
 	struct vmxctx *vmxctx;
-	uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason;
+	struct vlapic *vlapic;
+	uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, reason;
 	uint64_t qual, gpa;
 	bool retu;
 
@@ -1203,7 +1478,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 	switch (reason) {
 	case EXIT_REASON_EPT_FAULT:
 	case EXIT_REASON_EPT_MISCONFIG:
-	case EXIT_REASON_APIC:
+	case EXIT_REASON_APIC_ACCESS:
 	case EXIT_REASON_TASK_SWITCH:
 	case EXIT_REASON_EXCEPTION:
 		idtvec_info = vmcs_idt_vectoring_info();
@@ -1290,6 +1565,11 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		 * host interrupt handler in the VM's softc. We will inject
 		 * this virtual interrupt during the subsequent VM enter.
 		 */
+		intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
+		KASSERT((intr_info & VMCS_INTR_INFO_VALID) != 0 &&
+		    VMCS_INTR_INFO_TYPE(intr_info) == 0,
+		    ("VM exit interruption info invalid: %#x", intr_info));
+		vmx_trigger_hostintr(intr_info & 0xff);
 
 		/*
 		 * This is special. We want to treat this as an 'handled'
@@ -1318,24 +1598,42 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
 		break;
 	case EXIT_REASON_EPT_FAULT:
-		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EPT_FAULT, 1);
 		/*
 		 * If 'gpa' lies within the address space allocated to
 		 * memory then this must be a nested page fault otherwise
 		 * this must be an instruction that accesses MMIO space.
 		 */
 		gpa = vmcs_gpa();
-		if (vm_mem_allocated(vmx->vm, gpa)) {
+		if (vm_mem_allocated(vmx->vm, gpa) || apic_access_fault(gpa)) {
 			vmexit->exitcode = VM_EXITCODE_PAGING;
 			vmexit->u.paging.gpa = gpa;
 			vmexit->u.paging.fault_type = ept_fault_type(qual);
+			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
 		} else if (ept_emulation_fault(qual)) {
 			vmexit->exitcode = VM_EXITCODE_INST_EMUL;
 			vmexit->u.inst_emul.gpa = gpa;
 			vmexit->u.inst_emul.gla = vmcs_gla();
 			vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
+			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1);
 		}
 		break;
+	case EXIT_REASON_VIRTUALIZED_EOI:
+		vmexit->exitcode = VM_EXITCODE_IOAPIC_EOI;
+		vmexit->u.ioapic_eoi.vector = qual & 0xFF;
+		vmexit->inst_length = 0;	/* trap-like */
+		break;
+	case EXIT_REASON_APIC_ACCESS:
+		handled = vmx_handle_apic_access(vmx, vcpu, vmexit);
+		break;
+	case EXIT_REASON_APIC_WRITE:
+		/*
+		 * APIC-write VM exit is trap-like so the %rip is already
+		 * pointing to the next instruction.
+		 */
+		vmexit->inst_length = 0;
+		vlapic = vm_lapic(vmx->vm, vcpu);
+		handled = vmx_handle_apic_write(vlapic, qual);
+		break;
 	default:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
 		break;
@@ -1387,6 +1685,18 @@ vmx_exit_astpending(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 }
 
 static __inline int
+vmx_exit_rendezvous(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
+{
+
+	vmexit->rip = vmcs_guest_rip();
+	vmexit->inst_length = 0;
+	vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
+	vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RENDEZVOUS, 1);
+
+	return (UNHANDLED);
+}
+
+static __inline int
 vmx_exit_inst_error(struct vmxctx *vmxctx, int rc, struct vm_exit *vmexit)
 {
 
@@ -1415,26 +1725,29 @@ vmx_exit_inst_error(struct vmxctx *vmxctx, int rc, struct vm_exit *vmexit)
 }
 
 static int
-vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap)
+vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
+    void *rendezvous_cookie)
 {
 	int rc, handled, launched;
 	struct vmx *vmx;
+	struct vm *vm;
 	struct vmxctx *vmxctx;
 	struct vmcs *vmcs;
 	struct vm_exit *vmexit;
+	struct vlapic *vlapic;
 	uint64_t rip;
 	uint32_t exit_reason;
 
 	vmx = arg;
+	vm = vmx->vm;
 	vmcs = &vmx->vmcs[vcpu];
 	vmxctx = &vmx->ctx[vcpu];
-	vmexit = vm_exitinfo(vmx->vm, vcpu);
+	vlapic = vm_lapic(vm, vcpu);
+	vmexit = vm_exitinfo(vm, vcpu);
 	launched = 0;
 
 	KASSERT(vmxctx->pmap == pmap,
 	    ("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap));
-	KASSERT(vmxctx->eptp == vmx->eptp,
-	    ("eptp %p different than ctx eptp %#lx", eptp, vmxctx->eptp));
 
 	VMPTRLD(vmcs);
 
@@ -1444,12 +1757,12 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap)
 	 * from a different process than the one that actually runs it.
 	 *
 	 * If the life of a virtual machine was spent entirely in the context
-	 * of a single process we could do this once in vmcs_set_defaults().
+	 * of a single process we could do this once in vmx_vminit().
 	 */
 	vmcs_write(VMCS_HOST_CR3, rcr3());
 
 	vmcs_write(VMCS_GUEST_RIP, startrip);
-	vmx_set_pcpu_defaults(vmx, vcpu);
+	vmx_set_pcpu_defaults(vmx, vcpu, pmap);
 	do {
 		/*
 		 * Interrupts are disabled from this point on until the
@@ -1476,9 +1789,15 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap)
 			break;
 		}
 
-		vmx_inject_interrupts(vmx, vcpu);
+		if (vcpu_rendezvous_pending(rendezvous_cookie)) {
+			enable_intr();
+			handled = vmx_exit_rendezvous(vmx, vcpu, vmexit);
+			break;
+		}
+
+		vmx_inject_interrupts(vmx, vcpu, vlapic);
 		vmx_run_trace(vmx, vcpu);
-		rc = vmx_enter_guest(vmxctx, launched);
+		rc = vmx_enter_guest(vmxctx, vmx, launched);
 
 		enable_intr();
 
@@ -1509,9 +1828,9 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap)
 	}
 
 	if (!handled)
-		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_USERSPACE, 1);
+		vmm_stat_incr(vm, vcpu, VMEXIT_USERSPACE, 1);
 
-	VCPU_CTR1(vmx->vm, vcpu, "returning from vmx_run: exitcode %d",
+	VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d",
 	    vmexit->exitcode);
 
 	VMCLEAR(vmcs);
@@ -1524,6 +1843,9 @@ vmx_vmcleanup(void *arg)
 	int i, error;
 	struct vmx *vmx = arg;
 
+	if (virtual_interrupt_delivery)
+		vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE);
+
 	for (i = 0; i < VM_MAXCPU; i++)
 		vpid_free(vmx->state[i].vpid);
 
@@ -1731,11 +2053,11 @@ vmx_inject(void *arg, int vcpu, int type, int vector, uint32_t code,
 	if (error)
 		return (error);
 
-	if (info & VMCS_INTERRUPTION_INFO_VALID)
+	if (info & VMCS_INTR_INFO_VALID)
 		return (EAGAIN);
 
 	info = vector | (type_map[type] << 8) | (code_valid ? 1 << 11 : 0);
-	info |= VMCS_INTERRUPTION_INFO_VALID;
+	info |= VMCS_INTR_INFO_VALID;
 	error = vmcs_setreg(vmcs, 0, VMCS_IDENT(VMCS_ENTRY_INTR_INFO), info);
 	if (error != 0)
 		return (error);
@@ -1887,6 +2209,258 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
         return (retval);
 }
 
+struct vlapic_vtx {
+	struct vlapic	vlapic;
+	struct pir_desc	*pir_desc;
+	struct vmx	*vmx;
+};
+
+#define	VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg)	\
+do {									\
+	VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d",	\
+	    level ? "level" : "edge", vector);				\
+	VCPU_CTR1(vm, vcpuid, msg " pir0 0x%016lx", pir_desc->pir[0]);	\
+	VCPU_CTR1(vm, vcpuid, msg " pir1 0x%016lx", pir_desc->pir[1]);	\
+	VCPU_CTR1(vm, vcpuid, msg " pir2 0x%016lx", pir_desc->pir[2]);	\
+	VCPU_CTR1(vm, vcpuid, msg " pir3 0x%016lx", pir_desc->pir[3]);	\
+	VCPU_CTR1(vm, vcpuid, msg " notify: %s", notify ? "yes" : "no");\
+} while (0)
+
+/*
+ * vlapic->ops handlers that utilize the APICv hardware assist described in
+ * Chapter 29 of the Intel SDM.
+ */
+static int
+vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
+{
+	struct vlapic_vtx *vlapic_vtx;
+	struct pir_desc *pir_desc;
+	uint64_t mask;
+	int idx, notify;
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	pir_desc = vlapic_vtx->pir_desc;
+
+	/*
+	 * Keep track of interrupt requests in the PIR descriptor. This is
+	 * because the virtual APIC page pointed to by the VMCS cannot be
+	 * modified if the vcpu is running.
+	 */
+	idx = vector / 64;
+	mask = 1UL << (vector % 64);
+	atomic_set_long(&pir_desc->pir[idx], mask);
+	notify = atomic_cmpset_long(&pir_desc->pending, 0, 1);
+
+	VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector,
+	    level, "vmx_set_intr_ready");
+	return (notify);
+}
+
+static int
+vmx_pending_intr(struct vlapic *vlapic, int *vecptr)
+{
+	struct vlapic_vtx *vlapic_vtx;
+	struct pir_desc *pir_desc;
+	struct LAPIC *lapic;
+	uint64_t pending, pirval;
+	uint32_t ppr, vpr;
+	int i;
+
+	/*
+	 * This function is only expected to be called from the 'HLT' exit
+	 * handler which does not care about the vector that is pending.
+	 */
+	KASSERT(vecptr == NULL, ("vmx_pending_intr: vecptr must be NULL"));
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	pir_desc = vlapic_vtx->pir_desc;
+
+	pending = atomic_load_acq_long(&pir_desc->pending);
+	if (!pending)
+		return (0);	/* common case */
+
+	/*
+	 * If there is an interrupt pending then it will be recognized only
+	 * if its priority is greater than the processor priority.
+	 *
+	 * Special case: if the processor priority is zero then any pending
+	 * interrupt will be recognized.
+	 */
+	lapic = vlapic->apic_page;
+	ppr = lapic->ppr & 0xf0;
+	if (ppr == 0)
+		return (1);
+
+	VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d",
+	    lapic->ppr);
+
+	for (i = 3; i >= 0; i--) {
+		pirval = pir_desc->pir[i];
+		if (pirval != 0) {
+			vpr = (i * 64 + flsl(pirval) - 1) & 0xf0;
+			return (vpr > ppr);
+		}
+	}
+	return (0);
+}
+
+static void
+vmx_intr_accepted(struct vlapic *vlapic, int vector)
+{
+
+	panic("vmx_intr_accepted: not expected to be called");
+}
+
+static void
+vmx_set_tmr(struct vlapic *vlapic, int vector, bool level)
+{
+	struct vlapic_vtx *vlapic_vtx;
+	struct vmx *vmx;
+	struct vmcs *vmcs;
+	uint64_t mask, val;
+
+	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
+	KASSERT(!vcpu_is_running(vlapic->vm, vlapic->vcpuid, NULL),
+	    ("vmx_set_tmr: vcpu cannot be running"));
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	vmx = vlapic_vtx->vmx;
+	vmcs = &vmx->vmcs[vlapic->vcpuid];
+	mask = 1UL << (vector % 64);
+
+	VMPTRLD(vmcs);
+	val = vmcs_read(VMCS_EOI_EXIT(vector));
+	if (level)
+		val |= mask;
+	else
+		val &= ~mask;
+	vmcs_write(VMCS_EOI_EXIT(vector), val);
+	VMCLEAR(vmcs);
+}
+
+static void
+vmx_post_intr(struct vlapic *vlapic, int hostcpu)
+{
+
+	ipi_cpu(hostcpu, pirvec);
+}
+
+/*
+ * Transfer the pending interrupts in the PIR descriptor to the IRR
+ * in the virtual APIC page.
+ */
+static void
+vmx_inject_pir(struct vlapic *vlapic)
+{
+	struct vlapic_vtx *vlapic_vtx;
+	struct pir_desc *pir_desc;
+	struct LAPIC *lapic;
+	uint64_t val, pirval;
+	int rvi, pirbase;
+	uint16_t intr_status_old, intr_status_new;
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	pir_desc = vlapic_vtx->pir_desc;
+	if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) {
+		VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
+		    "no posted interrupt pending");
+		return;
+	}
+
+	pirval = 0;
+	lapic = vlapic->apic_page;
+
+	val = atomic_readandclear_long(&pir_desc->pir[0]);
+	if (val != 0) {
+		lapic->irr0 |= val;
+		lapic->irr1 |= val >> 32;
+		pirbase = 0;
+		pirval = val;
+	}
+
+	val = atomic_readandclear_long(&pir_desc->pir[1]);
+	if (val != 0) {
+		lapic->irr2 |= val;
+		lapic->irr3 |= val >> 32;
+		pirbase = 64;
+		pirval = val;
+	}
+
+	val = atomic_readandclear_long(&pir_desc->pir[2]);
+	if (val != 0) {
+		lapic->irr4 |= val;
+		lapic->irr5 |= val >> 32;
+		pirbase = 128;
+		pirval = val;
+	}
+
+	val = atomic_readandclear_long(&pir_desc->pir[3]);
+	if (val != 0) {
+		lapic->irr6 |= val;
+		lapic->irr7 |= val >> 32;
+		pirbase = 192;
+		pirval = val;
+	}
+	VLAPIC_CTR_IRR(vlapic, "vmx_inject_pir");
+
+	/*
+	 * Update RVI so the processor can evaluate pending virtual
+	 * interrupts on VM-entry.
+	 */
+	if (pirval != 0) {
+		rvi = pirbase + flsl(pirval) - 1;
+		intr_status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
+		intr_status_new = (intr_status_old & 0xFF00) | rvi;
+		if (intr_status_new > intr_status_old) {
+			vmcs_write(VMCS_GUEST_INTR_STATUS, intr_status_new);
+			VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
+			    "guest_intr_status changed from 0x%04x to 0x%04x",
+			    intr_status_old, intr_status_new);
+		}
+	}
+}
+
+static struct vlapic *
+vmx_vlapic_init(void *arg, int vcpuid)
+{
+	struct vmx *vmx;
+	struct vlapic *vlapic;
+	struct vlapic_vtx *vlapic_vtx;
+	
+	vmx = arg;
+
+	vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO);
+	vlapic->vm = vmx->vm;
+	vlapic->vcpuid = vcpuid;
+	vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid];
+
+	vlapic_vtx = (struct vlapic_vtx *)vlapic;
+	vlapic_vtx->pir_desc = &vmx->pir_desc[vcpuid];
+	vlapic_vtx->vmx = vmx;
+
+	if (virtual_interrupt_delivery) {
+		vlapic->ops.set_intr_ready = vmx_set_intr_ready;
+		vlapic->ops.pending_intr = vmx_pending_intr;
+		vlapic->ops.intr_accepted = vmx_intr_accepted;
+		vlapic->ops.set_tmr = vmx_set_tmr;
+	}
+
+	if (posted_interrupts)
+		vlapic->ops.post_intr = vmx_post_intr;
+
+	vlapic_init(vlapic);
+
+	return (vlapic);
+}
+
+static void
+vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic)
+{
+
+	vlapic_cleanup(vlapic);
+	free(vlapic, M_VLAPIC);
+}
+
 struct vmm_ops vmm_ops_intel = {
 	vmx_init,
 	vmx_cleanup,
@@ -1903,4 +2477,6 @@ struct vmm_ops vmm_ops_intel = {
 	vmx_setcap,
 	ept_vmspace_alloc,
 	ept_vmspace_free,
+	vmx_vlapic_init,
+	vmx_vlapic_cleanup,
 };
diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h
index 67ef631..80bfd72 100644
--- a/sys/amd64/vmm/intel/vmx.h
+++ b/sys/amd64/vmm/intel/vmx.h
@@ -64,16 +64,13 @@ struct vmxctx {
 	/*
 	 * XXX todo debug registers and fpu state
 	 */
-	
-	int		inst_fail_status;
 
-	long		eptgen[MAXCPU];		/* cached pmap->pm_eptgen */
+	int		inst_fail_status;
 
 	/*
-	 * The 'eptp' and the 'pmap' do not change during the lifetime of
-	 * the VM so it is safe to keep a copy in each vcpu's vmxctx.
+	 * The pmap needs to be deactivated in vmx_exit_guest()
+	 * so keep a copy of the 'pmap' in each vmxctx.
 	 */
-	vm_paddr_t	eptp;
 	struct pmap	*pmap;
 };
 
@@ -88,27 +85,45 @@ struct vmxstate {
 	uint16_t vpid;
 };
 
+struct apic_page {
+	uint32_t reg[PAGE_SIZE / 4];
+};
+CTASSERT(sizeof(struct apic_page) == PAGE_SIZE);
+
+/* Posted Interrupt Descriptor (described in section 29.6 of the Intel SDM) */
+struct pir_desc {
+	uint64_t	pir[4];
+	uint64_t	pending;
+	uint64_t	unused[3];
+} __aligned(64);
+CTASSERT(sizeof(struct pir_desc) == 64);
+
 /* virtual machine softc */
 struct vmx {
 	struct vmcs	vmcs[VM_MAXCPU];	/* one vmcs per virtual cpu */
+	struct apic_page apic_page[VM_MAXCPU];	/* one apic page per vcpu */
 	char		msr_bitmap[PAGE_SIZE];
+	struct pir_desc	pir_desc[VM_MAXCPU];
 	struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
 	struct vmxctx	ctx[VM_MAXCPU];
 	struct vmxcap	cap[VM_MAXCPU];
 	struct vmxstate	state[VM_MAXCPU];
 	uint64_t	eptp;
 	struct vm	*vm;
+	long		eptgen[MAXCPU];		/* cached pmap->pm_eptgen */
 };
 CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
 CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
 CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
+CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0);
 
 #define	VMX_GUEST_VMEXIT	0
 #define	VMX_VMRESUME_ERROR	1
 #define	VMX_VMLAUNCH_ERROR	2
 #define	VMX_INVEPT_ERROR	3
-int	vmx_enter_guest(struct vmxctx *ctx, int launched);
+int	vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched);
 void	vmx_exit_guest(void);
+void	vmx_call_isr(uintptr_t entry);
 
 u_long	vmx_fix_cr0(u_long cr0);
 u_long	vmx_fix_cr4(u_long cr4);
diff --git a/sys/amd64/vmm/intel/vmx_controls.h b/sys/amd64/vmm/intel/vmx_controls.h
index 3cd2eff..2b117ae 100644
--- a/sys/amd64/vmm/intel/vmx_controls.h
+++ b/sys/amd64/vmm/intel/vmx_controls.h
@@ -34,6 +34,7 @@
 #define	PINBASED_NMI_EXITING		(1 << 3)
 #define	PINBASED_VIRTUAL_NMI		(1 << 5)
 #define	PINBASED_PREMPTION_TIMER	(1 << 6)
+#define	PINBASED_POSTED_INTERRUPT	(1 << 7)
 
 /* Primary Processor-Based VM-Execution Controls */
 #define	PROCBASED_INT_WINDOW_EXITING	(1 << 2)
@@ -59,16 +60,18 @@
 #define	PROCBASED_SECONDARY_CONTROLS	(1U << 31)
 
 /* Secondary Processor-Based VM-Execution Controls */
-#define	PROCBASED2_VIRTUALIZE_APIC	(1 << 0)
-#define	PROCBASED2_ENABLE_EPT		(1 << 1)
-#define	PROCBASED2_DESC_TABLE_EXITING	(1 << 2)
-#define	PROCBASED2_ENABLE_RDTSCP	(1 << 3)
-#define	PROCBASED2_VIRTUALIZE_X2APIC	(1 << 4)
-#define	PROCBASED2_ENABLE_VPID		(1 << 5)
-#define	PROCBASED2_WBINVD_EXITING	(1 << 6)
-#define	PROCBASED2_UNRESTRICTED_GUEST	(1 << 7)
-#define	PROCBASED2_PAUSE_LOOP_EXITING	(1 << 10)
-#define	PROCBASED2_ENABLE_INVPCID	(1 << 12)
+#define	PROCBASED2_VIRTUALIZE_APIC_ACCESSES	(1 << 0)
+#define	PROCBASED2_ENABLE_EPT			(1 << 1)
+#define	PROCBASED2_DESC_TABLE_EXITING		(1 << 2)
+#define	PROCBASED2_ENABLE_RDTSCP		(1 << 3)
+#define	PROCBASED2_VIRTUALIZE_X2APIC_MODE	(1 << 4)
+#define	PROCBASED2_ENABLE_VPID			(1 << 5)
+#define	PROCBASED2_WBINVD_EXITING		(1 << 6)
+#define	PROCBASED2_UNRESTRICTED_GUEST		(1 << 7)
+#define	PROCBASED2_APIC_REGISTER_VIRTUALIZATION	(1 << 8)
+#define	PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY	(1 << 9)
+#define	PROCBASED2_PAUSE_LOOP_EXITING		(1 << 10)
+#define	PROCBASED2_ENABLE_INVPCID		(1 << 12)
 
 /* VM Exit Controls */
 #define	VM_EXIT_SAVE_DEBUG_CONTROLS	(1 << 2)
diff --git a/sys/amd64/vmm/intel/vmx_genassym.c b/sys/amd64/vmm/intel/vmx_genassym.c
index bf463dc..5c91fec 100644
--- a/sys/amd64/vmm/intel/vmx_genassym.c
+++ b/sys/amd64/vmm/intel/vmx_genassym.c
@@ -68,10 +68,10 @@ ASSYM(VMXCTX_HOST_RBX, offsetof(struct vmxctx, host_rbx));
 ASSYM(VMXCTX_HOST_RIP, offsetof(struct vmxctx, host_rip));
 
 ASSYM(VMXCTX_INST_FAIL_STATUS, offsetof(struct vmxctx, inst_fail_status));
-ASSYM(VMXCTX_EPTGEN, offsetof(struct vmxctx, eptgen));
-
 ASSYM(VMXCTX_PMAP, offsetof(struct vmxctx, pmap));
-ASSYM(VMXCTX_EPTP, offsetof(struct vmxctx, eptp));
+
+ASSYM(VMX_EPTGEN, offsetof(struct vmx, eptgen));
+ASSYM(VMX_EPTP, offsetof(struct vmx, eptp));
 
 ASSYM(VM_FAIL_INVALID,	VM_FAIL_INVALID);
 ASSYM(VM_FAIL_VALID,	VM_FAIL_VALID);
@@ -84,3 +84,6 @@ ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
 
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
 ASSYM(PM_EPTGEN, offsetof(struct pmap, pm_eptgen));
+
+ASSYM(KERNEL_SS, GSEL(GDATA_SEL, SEL_KPL));
+ASSYM(KERNEL_CS, GSEL(GCODE_SEL, SEL_KPL));
diff --git a/sys/amd64/vmm/intel/vmx_support.S b/sys/amd64/vmm/intel/vmx_support.S
index d616984..9e8cf2d 100644
--- a/sys/amd64/vmm/intel/vmx_support.S
+++ b/sys/amd64/vmm/intel/vmx_support.S
@@ -97,7 +97,8 @@
 /*
  * vmx_enter_guest(struct vmxctx *vmxctx, int launched)
  * %rdi: pointer to the 'vmxctx'
- * %esi: launch state of the VMCS
+ * %rsi: pointer to the 'vmx'
+ * %edx: launch state of the VMCS
  * Interrupts must be disabled on entry.
  */
 ENTRY(vmx_enter_guest)
@@ -114,19 +115,19 @@ ENTRY(vmx_enter_guest)
 	LK btsl	%eax, PM_ACTIVE(%r11)
 
 	/*
-	 * If 'vmxctx->eptgen[curcpu]' is not identical to 'pmap->pm_eptgen'
+	 * If 'vmx->eptgen[curcpu]' is not identical to 'pmap->pm_eptgen'
 	 * then we must invalidate all mappings associated with this EPTP.
 	 */
 	movq	PM_EPTGEN(%r11), %r10
-	cmpq	%r10, VMXCTX_EPTGEN(%rdi, %rax, 8)
+	cmpq	%r10, VMX_EPTGEN(%rsi, %rax, 8)
 	je	guest_restore
 
-	/* Refresh 'vmxctx->eptgen[curcpu]' */
-	movq	%r10, VMXCTX_EPTGEN(%rdi, %rax, 8)
+	/* Refresh 'vmx->eptgen[curcpu]' */
+	movq	%r10, VMX_EPTGEN(%rsi, %rax, 8)
 
 	/* Setup the invept descriptor on the host stack */
 	mov	%rsp, %r11
-	movq	VMXCTX_EPTP(%rdi), %rax
+	movq	VMX_EPTP(%rsi), %rax
 	movq	%rax, -16(%r11)
 	movq	$0x0, -8(%r11)
 	mov	$0x1, %eax		/* Single context invalidate */
@@ -134,7 +135,7 @@ ENTRY(vmx_enter_guest)
 	jbe	invept_error		/* Check invept instruction error */
 
 guest_restore:
-	cmpl	$0, %esi
+	cmpl	$0, %edx
 	je	do_launch
 
 	VMX_GUEST_RESTORE
@@ -234,3 +235,21 @@ ENTRY(vmx_exit_guest)
 	movl	$VMX_GUEST_VMEXIT, %eax
 	ret
 END(vmx_exit_guest)
+
+/*
+ * %rdi = interrupt handler entry point
+ *
+ * Calling sequence described in the "Instruction Set Reference" for the "INT"
+ * instruction in Intel SDM, Vol 2.
+ */
+ENTRY(vmx_call_isr)
+	mov	%rsp, %r11			/* save %rsp */
+	and	$~0xf, %rsp			/* align on 16-byte boundary */
+	pushq	$KERNEL_SS			/* %ss */
+	pushq	%r11				/* %rsp */
+	pushfq					/* %rflags */
+	pushq	$KERNEL_CS			/* %cs */
+	cli					/* disable interrupts */
+	callq	*%rdi				/* push %rip and call isr */
+	ret
+END(vmx_call_isr)
diff --git a/sys/amd64/vmm/io/vioapic.c b/sys/amd64/vmm/io/vioapic.c
index 151065a..703e479 100644
--- a/sys/amd64/vmm/io/vioapic.c
+++ b/sys/amd64/vmm/io/vioapic.c
@@ -222,8 +222,52 @@ vioapic_pulse_irq(struct vm *vm, int irq)
 	return (vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE));
 }
 
+/*
+ * Reset the vlapic's trigger-mode register to reflect the ioapic pin
+ * configuration.
+ */
+static void
+vioapic_update_tmr(struct vm *vm, int vcpuid, void *arg)
+{
+	struct vioapic *vioapic;
+	struct vlapic *vlapic;
+	uint32_t low, high, dest;
+	int delmode, pin, vector;
+	bool level, phys;
+
+	vlapic = vm_lapic(vm, vcpuid);
+	vioapic = vm_ioapic(vm);
+
+	VIOAPIC_LOCK(vioapic);
+	/*
+	 * Reset all vectors to be edge-triggered.
+	 */
+	vlapic_reset_tmr(vlapic);
+	for (pin = 0; pin < REDIR_ENTRIES; pin++) {
+		low = vioapic->rtbl[pin].reg;
+		high = vioapic->rtbl[pin].reg >> 32;
+
+		level = low & IOART_TRGRLVL ? true : false;
+		if (!level)
+			continue;
+
+		/*
+		 * For a level-triggered 'pin' let the vlapic figure out if
+		 * an assertion on this 'pin' would result in an interrupt
+		 * being delivered to it. If yes, then it will modify the
+		 * TMR bit associated with this vector to level-triggered.
+		 */
+		phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
+		delmode = low & IOART_DELMOD;
+		vector = low & IOART_INTVEC;
+		dest = high >> APIC_ID_SHIFT;
+		vlapic_set_tmr_level(vlapic, dest, phys, delmode, vector);
+	}
+	VIOAPIC_UNLOCK(vioapic);
+}
+
 static uint32_t
-vioapic_read(struct vioapic *vioapic, uint32_t addr)
+vioapic_read(struct vioapic *vioapic, int vcpuid, uint32_t addr)
 {
 	int regnum, pin, rshift;
 
@@ -258,10 +302,12 @@ vioapic_read(struct vioapic *vioapic, uint32_t addr)
 }
 
 static void
-vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data)
+vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data)
 {
 	uint64_t data64, mask64;
+	uint64_t last, changed;
 	int regnum, pin, lshift;
+	cpuset_t allvcpus;
 
 	regnum = addr & 0xff;
 	switch (regnum) {
@@ -285,6 +331,8 @@ vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data)
 		else
 			lshift = 0;
 
+		last = vioapic->rtbl[pin].reg;
+
 		data64 = (uint64_t)data << lshift;
 		mask64 = (uint64_t)0xffffffff << lshift;
 		vioapic->rtbl[pin].reg &= ~mask64 | RTBL_RO_BITS;
@@ -294,6 +342,22 @@ vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data)
 		    pin, vioapic->rtbl[pin].reg);
 
 		/*
+		 * If any fields in the redirection table entry (except mask
+		 * or polarity) have changed then rendezvous all the vcpus
+		 * to update their vlapic trigger-mode registers.
+		 */
+		changed = last ^ vioapic->rtbl[pin].reg;
+		if (changed & ~(IOART_INTMASK | IOART_INTPOL)) {
+			VIOAPIC_CTR1(vioapic, "ioapic pin%d: recalculate "
+			    "vlapic trigger-mode register", pin);
+			VIOAPIC_UNLOCK(vioapic);
+			allvcpus = vm_active_cpus(vioapic->vm);
+			vm_smp_rendezvous(vioapic->vm, vcpuid, allvcpus,
+			    vioapic_update_tmr, NULL);
+			VIOAPIC_LOCK(vioapic);
+		}
+
+		/*
 		 * Generate an interrupt if the following conditions are met:
 		 * - pin is not masked
 		 * - previous interrupt has been EOIed
@@ -310,8 +374,8 @@ vioapic_write(struct vioapic *vioapic, uint32_t addr, uint32_t data)
 }
 
 static int
-vioapic_mmio_rw(struct vioapic *vioapic, uint64_t gpa, uint64_t *data,
-    int size, bool doread)
+vioapic_mmio_rw(struct vioapic *vioapic, int vcpuid, uint64_t gpa,
+    uint64_t *data, int size, bool doread)
 {
 	uint64_t offset;
 
@@ -334,10 +398,13 @@ vioapic_mmio_rw(struct vioapic *vioapic, uint64_t gpa, uint64_t *data,
 		else
 			vioapic->ioregsel = *data;
 	} else {
-		if (doread)
-			*data = vioapic_read(vioapic, vioapic->ioregsel);
-		else
-			vioapic_write(vioapic, vioapic->ioregsel, *data);
+		if (doread) {
+			*data = vioapic_read(vioapic, vcpuid,
+			    vioapic->ioregsel);
+		} else {
+			vioapic_write(vioapic, vcpuid, vioapic->ioregsel,
+			    *data);
+		}
 	}
 	VIOAPIC_UNLOCK(vioapic);
 
@@ -352,7 +419,7 @@ vioapic_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval,
 	struct vioapic *vioapic;
 
 	vioapic = vm_ioapic(vm);
-	error = vioapic_mmio_rw(vioapic, gpa, rval, size, true);
+	error = vioapic_mmio_rw(vioapic, vcpuid, gpa, rval, size, true);
 	return (error);
 }
 
@@ -364,7 +431,7 @@ vioapic_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t wval,
 	struct vioapic *vioapic;
 
 	vioapic = vm_ioapic(vm);
-	error = vioapic_mmio_rw(vioapic, gpa, &wval, size, false);
+	error = vioapic_mmio_rw(vioapic, vcpuid, gpa, &wval, size, false);
 	return (error);
 }
 
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 695040d..2395247 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -37,108 +37,34 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/smp.h>
 
-#include <machine/clock.h>
 #include <x86/specialreg.h>
 #include <x86/apicreg.h>
 
+#include <machine/clock.h>
+#include <machine/smp.h>
+
 #include <machine/vmm.h>
 
-#include "vmm_stat.h"
+#include "vmm_ipi.h"
 #include "vmm_lapic.h"
 #include "vmm_ktr.h"
+#include "vmm_stat.h"
+
 #include "vlapic.h"
+#include "vlapic_priv.h"
 #include "vioapic.h"
 
-#define	VLAPIC_CTR0(vlapic, format)					\
-	VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
-
-#define	VLAPIC_CTR1(vlapic, format, p1)					\
-	VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
-
-#define	VLAPIC_CTR2(vlapic, format, p1, p2)				\
-	VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
-
-#define	VLAPIC_CTR_IRR(vlapic, msg)					\
-do {									\
-	uint32_t *irrptr = &(vlapic)->apic.irr0;			\
-	irrptr[0] = irrptr[0];	/* silence compiler */			\
-	VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]);	\
-} while (0)
-
-#define	VLAPIC_CTR_ISR(vlapic, msg)					\
-do {									\
-	uint32_t *isrptr = &(vlapic)->apic.isr0;			\
-	isrptr[0] = isrptr[0];	/* silence compiler */			\
-	VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]);	\
-	VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]);	\
-} while (0)
-
-static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
-
 #define	PRIO(x)			((x) >> 4)
 
 #define VLAPIC_VERSION		(16)
-#define VLAPIC_MAXLVT_ENTRIES	(APIC_LVT_CMCI)
 
 #define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
 
-enum boot_state {
-	BS_INIT,
-	BS_SIPI,
-	BS_RUNNING
-};
-
-struct vlapic {
-	struct vm		*vm;
-	int			vcpuid;
-
-	struct LAPIC		apic;
-
-	uint32_t		esr_pending;
-	int			esr_firing;
-
-	struct callout	callout;	/* vlapic timer */
-	struct bintime	timer_fire_bt;	/* callout expiry time */
-	struct bintime	timer_freq_bt;	/* timer frequency */
-	struct bintime	timer_period_bt; /* timer period */
-	struct mtx	timer_mtx;
-
-	/*
-	 * The 'isrvec_stk' is a stack of vectors injected by the local apic.
-	 * A vector is popped from the stack when the processor does an EOI.
-	 * The vector on the top of the stack is used to compute the
-	 * Processor Priority in conjunction with the TPR.
-	 */
-	uint8_t			 isrvec_stk[ISRVEC_STK_SIZE];
-	int			 isrvec_stk_top;
-
-	uint64_t		msr_apicbase;
-	enum boot_state		boot_state;
-};
-
 /*
  * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
- * vlapic_callout_handler() and vcpu accesses to the following registers:
- * - initial count register aka icr_timer
- * - current count register aka ccr_timer
- * - divide config register aka dcr_timer
+ * vlapic_callout_handler() and vcpu accesses to:
+ * - timer_freq_bt, timer_period_bt, timer_fire_bt
  * - timer LVT register
- *
- * Note that the vlapic_callout_handler() does not write to any of these
- * registers so they can be safely read from the vcpu context without locking.
  */
 #define	VLAPIC_TIMER_LOCK(vlapic)	mtx_lock_spin(&((vlapic)->timer_mtx))
 #define	VLAPIC_TIMER_UNLOCK(vlapic)	mtx_unlock_spin(&((vlapic)->timer_mtx))
@@ -156,72 +82,71 @@ vlapic_get_id(struct vlapic *vlapic)
 		return (vlapic->vcpuid << 24);
 }
 
-static __inline uint32_t
-vlapic_get_ldr(struct vlapic *vlapic)
+static uint32_t
+x2apic_ldr(struct vlapic *vlapic)
 {
-	struct LAPIC *lapic;
 	int apicid;
 	uint32_t ldr;
 
-	lapic = &vlapic->apic;
-	if (x2apic(vlapic)) {
-		apicid = vlapic_get_id(vlapic);
-		ldr = 1 << (apicid & 0xf);
-		ldr |= (apicid & 0xffff0) << 12;
-		return (ldr);
-	} else
-		return (lapic->ldr);
+	apicid = vlapic_get_id(vlapic);
+	ldr = 1 << (apicid & 0xf);
+	ldr |= (apicid & 0xffff0) << 12;
+	return (ldr);
 }
 
-static __inline uint32_t
-vlapic_get_dfr(struct vlapic *vlapic)
+void
+vlapic_dfr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 
-	lapic = &vlapic->apic;
-	if (x2apic(vlapic))
-		return (0);
-	else
-		return (lapic->dfr);
-}
-
-static void
-vlapic_set_dfr(struct vlapic *vlapic, uint32_t data)
-{
-	uint32_t dfr;
-	struct LAPIC *lapic;
-	
+	lapic = vlapic->apic_page;
 	if (x2apic(vlapic)) {
-		VM_CTR1(vlapic->vm, "write to DFR in x2apic mode: %#x", data);
+		VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x",
+		    lapic->dfr);
+		lapic->dfr = 0;
 		return;
 	}
 
-	lapic = &vlapic->apic;
-	dfr = (lapic->dfr & APIC_DFR_RESERVED) | (data & APIC_DFR_MODEL_MASK);
-	if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
+	lapic->dfr &= APIC_DFR_MODEL_MASK;
+	lapic->dfr |= APIC_DFR_RESERVED;
+
+	if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
 		VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
-	else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
+	else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
 		VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
 	else
-		VLAPIC_CTR1(vlapic, "vlapic DFR in Unknown Model %#x", dfr);
-
-	lapic->dfr = dfr;
+		VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr);
 }
 
-static void
-vlapic_set_ldr(struct vlapic *vlapic, uint32_t data)
+void
+vlapic_ldr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 
+	lapic = vlapic->apic_page;
+
 	/* LDR is read-only in x2apic mode */
 	if (x2apic(vlapic)) {
-		VLAPIC_CTR1(vlapic, "write to LDR in x2apic mode: %#x", data);
-		return;
+		VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x",
+		    lapic->ldr);
+		lapic->ldr = x2apic_ldr(vlapic);
+	} else {
+		lapic->ldr &= ~APIC_LDR_RESERVED;
+		VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
 	}
+}
 
-	lapic = &vlapic->apic;
-	lapic->ldr = data & ~APIC_LDR_RESERVED;
-	VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
+void
+vlapic_id_write_handler(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	
+	/*
+	 * We don't allow the ID register to be modified so reset it back to
+	 * its default value.
+	 */
+	lapic = vlapic->apic_page;
+	lapic->id = vlapic_get_id(vlapic);
 }
 
 static int
@@ -249,16 +174,6 @@ vlapic_timer_divisor(uint32_t dcr)
 	}
 }
 
-static void
-vlapic_mask_lvts(uint32_t *lvts, int num_lvt)
-{
-	int i;
-	for (i = 0; i < num_lvt; i++) {
-		*lvts |= APIC_LVT_M;
-		lvts += 4;
-	}
-}
-
 #if 0
 static inline void
 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
@@ -277,7 +192,7 @@ vlapic_get_ccr(struct vlapic *vlapic)
 	uint32_t ccr;
 	
 	ccr = 0;
-	lapic = &vlapic->apic;
+	lapic = vlapic->apic_page;
 
 	VLAPIC_TIMER_LOCK(vlapic);
 	if (callout_active(&vlapic->callout)) {
@@ -301,18 +216,18 @@ vlapic_get_ccr(struct vlapic *vlapic)
 	return (ccr);
 }
 
-static void
-vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr)
+void
+vlapic_dcr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 	int divisor;
 	
-	lapic = &vlapic->apic;
+	lapic = vlapic->apic_page;
 	VLAPIC_TIMER_LOCK(vlapic);
 
-	lapic->dcr_timer = dcr;
-	divisor = vlapic_timer_divisor(dcr);
-	VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", dcr, divisor);
+	divisor = vlapic_timer_divisor(lapic->dcr_timer);
+	VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d",
+	    lapic->dcr_timer, divisor);
 
 	/*
 	 * Update the timer frequency and the timer period.
@@ -327,57 +242,42 @@ vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr)
 	VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
-static void
-vlapic_update_errors(struct vlapic *vlapic)
-{
-	struct LAPIC    *lapic = &vlapic->apic;
-	lapic->esr = vlapic->esr_pending;
-	vlapic->esr_pending = 0;
-}
-
-static void
-vlapic_reset(struct vlapic *vlapic)
+void
+vlapic_esr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 	
-	lapic = &vlapic->apic;
-	bzero(lapic, sizeof(struct LAPIC));
-
-	lapic->version = VLAPIC_VERSION;
-	lapic->version |= (VLAPIC_MAXLVT_ENTRIES << MAXLVTSHIFT);
-	lapic->dfr = 0xffffffff;
-	lapic->svr = APIC_SVR_VECTOR;
-	vlapic_mask_lvts(&lapic->lvt_timer, 6);
-	vlapic_mask_lvts(&lapic->lvt_cmci, 1);
-	vlapic_set_dcr(vlapic, 0);
-
-	if (vlapic->vcpuid == 0)
-		vlapic->boot_state = BS_RUNNING;	/* BSP */
-	else
-		vlapic->boot_state = BS_INIT;		/* AP */
+	lapic = vlapic->apic_page;
+	lapic->esr = vlapic->esr_pending;
+	vlapic->esr_pending = 0;
 }
 
-void
+int
 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
-	uint32_t	*irrptr, *tmrptr, mask;
-	int		idx;
+	struct LAPIC *lapic;
+	uint32_t *irrptr, *tmrptr, mask;
+	int idx;
 
-	if (vector < 0 || vector >= 256)
-		panic("vlapic_set_intr_ready: invalid vector %d\n", vector);
+	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
 
+	lapic = vlapic->apic_page;
 	if (!(lapic->svr & APIC_SVR_ENABLE)) {
 		VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
 		    "interrupt %d", vector);
-		return;
+		return (0);
 	}
 
 	if (vector < 16) {
 		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR);
-		return;
+		VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d",
+		    vector);
+		return (1);
 	}
-		
+
+	if (vlapic->ops.set_intr_ready)
+		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
+
 	idx = (vector / 32) * 4;
 	mask = 1 << (vector % 32);
 
@@ -385,23 +285,22 @@ vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 	atomic_set_int(&irrptr[idx], mask);
 
 	/*
-	 * Upon acceptance of an interrupt into the IRR the corresponding
-	 * TMR bit is cleared for edge-triggered interrupts and set for
-	 * level-triggered interrupts.
+	 * Verify that the trigger-mode of the interrupt matches with
+	 * the vlapic TMR registers.
 	 */
 	tmrptr = &lapic->tmr0;
-	if (level)
-		atomic_set_int(&tmrptr[idx], mask);
-	else
-		atomic_clear_int(&tmrptr[idx], mask);
+	KASSERT((tmrptr[idx] & mask) == (level ? mask : 0),
+	    ("vlapic TMR[%d] is 0x%08x but interrupt is %s-triggered",
+	    idx / 4, tmrptr[idx], level ? "level" : "edge"));
 
 	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
+	return (1);
 }
 
 static __inline uint32_t *
 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
+	struct LAPIC	*lapic = vlapic->apic_page;
 	int 		 i;
 
 	switch (offset) {
@@ -415,24 +314,65 @@ vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
 	}
 }
 
+static __inline int
+lvt_off_to_idx(uint32_t offset)
+{
+	int index;
+
+	switch (offset) {
+	case APIC_OFFSET_CMCI_LVT:
+		index = APIC_LVT_CMCI;
+		break;
+	case APIC_OFFSET_TIMER_LVT:
+		index = APIC_LVT_TIMER;
+		break;
+	case APIC_OFFSET_THERM_LVT:
+		index = APIC_LVT_THERMAL;
+		break;
+	case APIC_OFFSET_PERF_LVT:
+		index = APIC_LVT_PMC;
+		break;
+	case APIC_OFFSET_LINT0_LVT:
+		index = APIC_LVT_LINT0;
+		break;
+	case APIC_OFFSET_LINT1_LVT:
+		index = APIC_LVT_LINT1;
+		break;
+	case APIC_OFFSET_ERROR_LVT:
+		index = APIC_LVT_ERROR;
+		break;
+	default:
+		index = -1;
+		break;
+	}
+	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
+	    "invalid lvt index %d for offset %#x", index, offset));
+
+	return (index);
+}
+
 static __inline uint32_t
 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
 {
+	int idx;
+	uint32_t val;
 
-	return (*vlapic_get_lvtptr(vlapic, offset));
+	idx = lvt_off_to_idx(offset);
+	val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
+	return (val);
 }
 
-static void
-vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val)
+void
+vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
 {
-	uint32_t *lvtptr, mask;
+	uint32_t *lvtptr, mask, val;
 	struct LAPIC *lapic;
+	int idx;
 	
-	lapic = &vlapic->apic;
+	lapic = vlapic->apic_page;
 	lvtptr = vlapic_get_lvtptr(vlapic, offset);	
-
-	if (offset == APIC_OFFSET_TIMER_LVT)
-		VLAPIC_TIMER_LOCK(vlapic);
+	val = *lvtptr;
+	idx = lvt_off_to_idx(offset);
 
 	if (!(lapic->svr & APIC_SVR_ENABLE))
 		val |= APIC_LVT_M;
@@ -451,10 +391,36 @@ vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val)
 		mask |= APIC_LVT_DM;
 		break;
 	}
-	*lvtptr = val & mask;
+	val &= mask;
+	*lvtptr = val;
+	atomic_store_rel_32(&vlapic->lvt_last[idx], val);
+}
+
+static void
+vlapic_mask_lvts(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic = vlapic->apic_page;
+
+	lapic->lvt_cmci |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
+
+	lapic->lvt_timer |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
 
-	if (offset == APIC_OFFSET_TIMER_LVT)
-		VLAPIC_TIMER_UNLOCK(vlapic);
+	lapic->lvt_thermal |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
+
+	lapic->lvt_pcint |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
+
+	lapic->lvt_lint0 |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
+
+	lapic->lvt_lint1 |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
+
+	lapic->lvt_error |= APIC_LVT_M;
+	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
 }
 
 static int
@@ -474,8 +440,8 @@ vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt)
 			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
 			return (0);
 		}
-		vlapic_set_intr_ready(vlapic, vec, false);
-		vcpu_notify_event(vlapic->vm, vlapic->vcpuid);
+		if (vlapic_set_intr_ready(vlapic, vec, false))
+			vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
 		break;
 	case APIC_LVT_DM_NMI:
 		vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
@@ -494,7 +460,7 @@ dump_isrvec_stk(struct vlapic *vlapic)
 	int i;
 	uint32_t *isrptr;
 
-	isrptr = &vlapic->apic.isr0;
+	isrptr = &vlapic->apic_page->isr0;
 	for (i = 0; i < 8; i++)
 		printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
 
@@ -519,7 +485,7 @@ vlapic_update_ppr(struct vlapic *vlapic)
 	 * bits is set in the ISRx registers.
 	 */
 	isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
-	tpr = vlapic->apic.tpr;
+	tpr = vlapic->apic_page->tpr;
 
 #if 1
 	{
@@ -548,7 +514,7 @@ vlapic_update_ppr(struct vlapic *vlapic)
 		 * corresponding entry on the isrvec stack.
 		 */
 		i = 1;
-		isrptr = &vlapic->apic.isr0;
+		isrptr = &vlapic->apic_page->isr0;
 		for (vector = 0; vector < 256; vector++) {
 			idx = (vector / 32) * 4;
 			if (isrptr[idx] & (1 << (vector % 32))) {
@@ -568,14 +534,14 @@ vlapic_update_ppr(struct vlapic *vlapic)
 	else
 		ppr = isrvec & 0xf0;
 
-	vlapic->apic.ppr = ppr;
+	vlapic->apic_page->ppr = ppr;
 	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
 }
 
 static void
 vlapic_process_eoi(struct vlapic *vlapic)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
+	struct LAPIC	*lapic = vlapic->apic_page;
 	uint32_t	*isrptr, *tmrptr;
 	int		i, idx, bitpos, vector;
 
@@ -675,7 +641,7 @@ vlapic_fire_cmci(struct vlapic *vlapic)
 	}
 }
 
-static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_ENTRIES,
+static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
     "lvts triggered");
 
 int
@@ -735,8 +701,6 @@ vlapic_callout_handler(void *arg)
 
 	callout_deactivate(&vlapic->callout);
 
-	KASSERT(vlapic->apic.icr_timer != 0, ("vlapic timer is disabled"));
-
 	vlapic_fire_timer(vlapic);
 
 	if (vlapic_periodic_timer(vlapic)) {
@@ -781,16 +745,17 @@ done:
 	VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
-static void
-vlapic_set_icr_timer(struct vlapic *vlapic, uint32_t icr_timer)
+void
+vlapic_icrtmr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 	sbintime_t sbt;
+	uint32_t icr_timer;
 
 	VLAPIC_TIMER_LOCK(vlapic);
 
-	lapic = &vlapic->apic;
-	lapic->icr_timer = icr_timer;
+	lapic = vlapic->apic_page;
+	icr_timer = lapic->icr_timer;
 
 	vlapic->timer_period_bt = vlapic->timer_freq_bt;
 	bintime_mul(&vlapic->timer_period_bt, icr_timer);
@@ -872,8 +837,8 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
 			CPU_CLR(vcpuid, &amask);
 
 			vlapic = vm_lapic(vm, vcpuid);
-			dfr = vlapic_get_dfr(vlapic);
-			ldr = vlapic_get_ldr(vlapic);
+			dfr = vlapic->apic_page->dfr;
+			ldr = vlapic->apic_page->ldr;
 
 			if ((dfr & APIC_DFR_MODEL_MASK) ==
 			    APIC_DFR_MODEL_FLAT) {
@@ -912,16 +877,22 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
 
 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
 
-static int
-lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu)
+int
+vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
 {
 	int i;
 	bool phys;
 	cpuset_t dmask;
+	uint64_t icrval;
 	uint32_t dest, vec, mode;
 	struct vlapic *vlapic2;
 	struct vm_exit *vmexit;
-	
+	struct LAPIC *lapic;
+
+	lapic = vlapic->apic_page;
+	lapic->icr_lo &= ~APIC_DELSTAT_PEND;
+	icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
+
 	if (x2apic(vlapic))
 		dest = icrval >> 32;
 	else
@@ -931,9 +902,12 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu)
 
 	if (mode == APIC_DELMODE_FIXED && vec < 16) {
 		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
+		VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
 		return (0);
 	}
-	
+
+	VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
+
 	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
 		switch (icrval & APIC_DEST_MASK) {
 		case APIC_DEST_DESTFLD:
@@ -963,8 +937,13 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu)
 				lapic_intr_edge(vlapic->vm, i, vec);
 				vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
 						    IPIS_SENT, i, 1);
-			} else
+				VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
+				    "to vcpuid %d", vec, i);
+			} else {
 				vm_inject_nmi(vlapic->vm, i);
+				VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
+				    "to vcpuid %d", i);
+			}
 		}
 
 		return (0);	/* handled completely in the kernel */
@@ -1019,12 +998,15 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu)
 }
 
 int
-vlapic_pending_intr(struct vlapic *vlapic)
+vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
+	struct LAPIC	*lapic = vlapic->apic_page;
 	int	  	 idx, i, bitpos, vector;
 	uint32_t	*irrptr, val;
 
+	if (vlapic->ops.pending_intr)
+		return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
+
 	irrptr = &lapic->irr0;
 
 	/*
@@ -1039,21 +1021,26 @@ vlapic_pending_intr(struct vlapic *vlapic)
 			vector = i * 32 + (bitpos - 1);
 			if (PRIO(vector) > PRIO(lapic->ppr)) {
 				VLAPIC_CTR1(vlapic, "pending intr %d", vector);
-				return (vector);
+				if (vecptr != NULL)
+					*vecptr = vector;
+				return (1);
 			} else 
 				break;
 		}
 	}
-	return (-1);
+	return (0);
 }
 
 void
 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
+	struct LAPIC	*lapic = vlapic->apic_page;
 	uint32_t	*irrptr, *isrptr;
 	int		idx, stk_top;
 
+	if (vlapic->ops.intr_accepted)
+		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
+
 	/*
 	 * clear the ready bit for vector being accepted in irr 
 	 * and set the vector as in service in isr.
@@ -1081,24 +1068,30 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
 	vlapic_update_ppr(vlapic);
 }
 
-static void
-lapic_set_svr(struct vlapic *vlapic, uint32_t new)
+void
+vlapic_svr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
-	uint32_t old, changed;
+	uint32_t old, new, changed;
+
+	lapic = vlapic->apic_page;
+
+	new = lapic->svr;
+	old = vlapic->svr_last;
+	vlapic->svr_last = new;
 
-	lapic = &vlapic->apic;
-	old = lapic->svr;
 	changed = old ^ new;
 	if ((changed & APIC_SVR_ENABLE) != 0) {
 		if ((new & APIC_SVR_ENABLE) == 0) {
 			/*
-			 * The apic is now disabled so stop the apic timer.
+			 * The apic is now disabled so stop the apic timer
+			 * and mask all the LVT entries.
 			 */
 			VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
 			VLAPIC_TIMER_LOCK(vlapic);
 			callout_stop(&vlapic->callout);
 			VLAPIC_TIMER_UNLOCK(vlapic);
+			vlapic_mask_lvts(vlapic);
 		} else {
 			/*
 			 * The apic is now enabled so restart the apic timer
@@ -1106,16 +1099,15 @@ lapic_set_svr(struct vlapic *vlapic, uint32_t new)
 			 */
 			VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
 			if (vlapic_periodic_timer(vlapic))
-				vlapic_set_icr_timer(vlapic, lapic->icr_timer);
+				vlapic_icrtmr_write_handler(vlapic);
 		}
 	}
-	lapic->svr = new;
 }
 
 int
 vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
+	struct LAPIC	*lapic = vlapic->apic_page;
 	uint32_t	*reg;
 	int		 i;
 
@@ -1128,7 +1120,7 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
 	switch(offset)
 	{
 		case APIC_OFFSET_ID:
-			*data = vlapic_get_id(vlapic);
+			*data = lapic->id;
 			break;
 		case APIC_OFFSET_VER:
 			*data = lapic->version;
@@ -1146,10 +1138,10 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
 			*data = lapic->eoi;
 			break;
 		case APIC_OFFSET_LDR:
-			*data = vlapic_get_ldr(vlapic);
+			*data = lapic->ldr;
 			break;
 		case APIC_OFFSET_DFR:
-			*data = vlapic_get_dfr(vlapic);
+			*data = lapic->dfr;
 			break;
 		case APIC_OFFSET_SVR:
 			*data = lapic->svr;
@@ -1174,6 +1166,8 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
 			break;
 		case APIC_OFFSET_ICR_LOW: 
 			*data = lapic->icr_lo;
+			if (x2apic(vlapic))
+				*data |= (uint64_t)lapic->icr_hi << 32;
 			break;
 		case APIC_OFFSET_ICR_HI: 
 			*data = lapic->icr_hi;
@@ -1181,14 +1175,19 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
 		case APIC_OFFSET_CMCI_LVT:
 		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
 			*data = vlapic_get_lvt(vlapic, offset);	
+#ifdef INVARIANTS
+			reg = vlapic_get_lvtptr(vlapic, offset);
+			KASSERT(*data == *reg, ("inconsistent lvt value at "
+			    "offset %#lx: %#lx/%#x", offset, *data, *reg));
+#endif
 			break;
-		case APIC_OFFSET_ICR:
+		case APIC_OFFSET_TIMER_ICR:
 			*data = lapic->icr_timer;
 			break;
-		case APIC_OFFSET_CCR:
+		case APIC_OFFSET_TIMER_CCR:
 			*data = vlapic_get_ccr(vlapic);
 			break;
-		case APIC_OFFSET_DCR:
+		case APIC_OFFSET_TIMER_DCR:
 			*data = lapic->dcr_timer;
 			break;
 		case APIC_OFFSET_RRR:
@@ -1204,9 +1203,13 @@ done:
 int
 vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
 {
-	struct LAPIC	*lapic = &vlapic->apic;
+	struct LAPIC	*lapic = vlapic->apic_page;
+	uint32_t	*regptr;
 	int		retval;
 
+	KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE,
+	    ("vlapic_write: invalid offset %#lx", offset));
+
 	VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data);
 
 	if (offset > sizeof(*lapic)) {
@@ -1214,10 +1217,11 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
 	}
 
 	retval = 0;
-	offset &= ~3;
 	switch(offset)
 	{
 		case APIC_OFFSET_ID:
+			lapic->id = data;
+			vlapic_id_write_handler(vlapic);
 			break;
 		case APIC_OFFSET_TPR:
 			lapic->tpr = data & 0xff;
@@ -1227,41 +1231,44 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
 			vlapic_process_eoi(vlapic);
 			break;
 		case APIC_OFFSET_LDR:
-			vlapic_set_ldr(vlapic, data);
+			lapic->ldr = data;
+			vlapic_ldr_write_handler(vlapic);
 			break;
 		case APIC_OFFSET_DFR:
-			vlapic_set_dfr(vlapic, data);
+			lapic->dfr = data;
+			vlapic_dfr_write_handler(vlapic);
 			break;
 		case APIC_OFFSET_SVR:
-			lapic_set_svr(vlapic, data);
+			lapic->svr = data;
+			vlapic_svr_write_handler(vlapic);
 			break;
 		case APIC_OFFSET_ICR_LOW: 
-			if (!x2apic(vlapic)) {
-				data &= 0xffffffff;
-				data |= (uint64_t)lapic->icr_hi << 32;
-			}
-			retval = lapic_process_icr(vlapic, data, retu);
+			lapic->icr_lo = data;
+			if (x2apic(vlapic))
+				lapic->icr_hi = data >> 32;
+			retval = vlapic_icrlo_write_handler(vlapic, retu);
 			break;
 		case APIC_OFFSET_ICR_HI:
-			if (!x2apic(vlapic)) {
-				retval = 0;
-				lapic->icr_hi = data;
-			}
+			lapic->icr_hi = data;
 			break;
 		case APIC_OFFSET_CMCI_LVT:
 		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
-			vlapic_set_lvt(vlapic, offset, data);
+			regptr = vlapic_get_lvtptr(vlapic, offset);
+			*regptr = data;
+			vlapic_lvt_write_handler(vlapic, offset);
 			break;
-		case APIC_OFFSET_ICR:
-			vlapic_set_icr_timer(vlapic, data);
+		case APIC_OFFSET_TIMER_ICR:
+			lapic->icr_timer = data;
+			vlapic_icrtmr_write_handler(vlapic);
 			break;
 
-		case APIC_OFFSET_DCR:
-			vlapic_set_dcr(vlapic, data);
+		case APIC_OFFSET_TIMER_DCR:
+			lapic->dcr_timer = data;
+			vlapic_dcr_write_handler(vlapic);
 			break;
 
 		case APIC_OFFSET_ESR:
-			vlapic_update_errors(vlapic);
+			vlapic_esr_write_handler(vlapic);
 			break;
 		case APIC_OFFSET_VER:
 		case APIC_OFFSET_APR:
@@ -1270,7 +1277,7 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
 		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
 		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
 		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
-		case APIC_OFFSET_CCR:
+		case APIC_OFFSET_TIMER_CCR:
 		default:
 			// Read only.
 			break;
@@ -1279,14 +1286,41 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
 	return (retval);
 }
 
-struct vlapic *
-vlapic_init(struct vm *vm, int vcpuid)
+static void
+vlapic_reset(struct vlapic *vlapic)
 {
-	struct vlapic 		*vlapic;
+	struct LAPIC *lapic;
+	
+	lapic = vlapic->apic_page;
+	bzero(lapic, sizeof(struct LAPIC));
 
-	vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
-	vlapic->vm = vm;
-	vlapic->vcpuid = vcpuid;
+	lapic->id = vlapic_get_id(vlapic);
+	lapic->version = VLAPIC_VERSION;
+	lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
+	lapic->dfr = 0xffffffff;
+	lapic->svr = APIC_SVR_VECTOR;
+	vlapic_mask_lvts(vlapic);
+	vlapic_reset_tmr(vlapic);
+
+	lapic->dcr_timer = 0;
+	vlapic_dcr_write_handler(vlapic);
+
+	if (vlapic->vcpuid == 0)
+		vlapic->boot_state = BS_RUNNING;	/* BSP */
+	else
+		vlapic->boot_state = BS_INIT;		/* AP */
+
+	vlapic->svr_last = lapic->svr;
+}
+
+void
+vlapic_init(struct vlapic *vlapic)
+{
+	KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
+	KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU,
+	    ("vlapic_init: vcpuid is not initialized"));
+	KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
+	    "initialized"));
 
 	/*
 	 * If the vlapic is configured in x2apic mode then it will be
@@ -1300,12 +1334,10 @@ vlapic_init(struct vm *vm, int vcpuid)
 
 	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
 
-	if (vcpuid == 0)
+	if (vlapic->vcpuid == 0)
 		vlapic->msr_apicbase |= APICBASE_BSP;
 
 	vlapic_reset(vlapic);
-
-	return (vlapic);
 }
 
 void
@@ -1313,7 +1345,6 @@ vlapic_cleanup(struct vlapic *vlapic)
 {
 
 	callout_drain(&vlapic->callout);
-	free(vlapic, M_VLAPIC);
 }
 
 uint64_t
@@ -1324,19 +1355,38 @@ vlapic_get_apicbase(struct vlapic *vlapic)
 }
 
 void
-vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
+vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new)
 {
-	int err;
+	struct LAPIC *lapic;
 	enum x2apic_state state;
+	uint64_t old;
+	int err;
 
 	err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state);
 	if (err)
 		panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
 
 	if (state == X2APIC_DISABLED)
-		val &= ~APICBASE_X2APIC;
+		new &= ~APICBASE_X2APIC;
+
+	old = vlapic->msr_apicbase;
+	vlapic->msr_apicbase = new;
 
-	vlapic->msr_apicbase = val;
+	/*
+	 * If the vlapic is switching between xAPIC and x2APIC modes then
+	 * reset the mode-dependent registers.
+	 */
+	if ((old ^ new) & APICBASE_X2APIC) {
+		lapic = vlapic->apic_page;
+		lapic->id = vlapic_get_id(vlapic);
+		if (x2apic(vlapic)) {
+			lapic->ldr = x2apic_ldr(vlapic);
+			lapic->dfr = 0;
+		} else {
+			lapic->ldr = 0;
+			lapic->dfr = 0xffffffff;
+		}
+	}
 }
 
 void
@@ -1378,10 +1428,28 @@ vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
 	}
 }
 
+void
+vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum)
+{
+	/*
+	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
+	 *
+	 * This is done by leveraging features like Posted Interrupts (Intel)
+	 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
+	 *
+	 * If neither of these features are available then fallback to
+	 * sending an IPI to 'hostcpu'.
+	 */
+	if (vlapic->ops.post_intr)
+		(*vlapic->ops.post_intr)(vlapic, hostcpu);
+	else
+		ipi_cpu(hostcpu, ipinum);
+}
+
 bool
 vlapic_enabled(struct vlapic *vlapic)
 {
-	struct LAPIC *lapic = &vlapic->apic;
+	struct LAPIC *lapic = vlapic->apic_page;
 
 	if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
 	    (lapic->svr & APIC_SVR_ENABLE) != 0)
@@ -1389,3 +1457,62 @@ vlapic_enabled(struct vlapic *vlapic)
 	else
 		return (false);
 }
+
+static void
+vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
+{
+	struct LAPIC *lapic;
+	uint32_t *tmrptr, mask;
+	int idx;
+
+	lapic = vlapic->apic_page;
+	tmrptr = &lapic->tmr0;
+	idx = (vector / 32) * 4;
+	mask = 1 << (vector % 32);
+	if (level)
+		tmrptr[idx] |= mask;
+	else
+		tmrptr[idx] &= ~mask;
+
+	if (vlapic->ops.set_tmr != NULL)
+		(*vlapic->ops.set_tmr)(vlapic, vector, level);
+}
+
+void
+vlapic_reset_tmr(struct vlapic *vlapic)
+{
+	int vector;
+
+	VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
+
+	for (vector = 0; vector <= 255; vector++)
+		vlapic_set_tmr(vlapic, vector, false);
+}
+
+void
+vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
+    int delmode, int vector)
+{
+	cpuset_t dmask;
+	bool lowprio;
+
+	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
+
+	/*
+	 * A level trigger is valid only for fixed and lowprio delivery modes.
+	 */
+	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
+		VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
+		    "delivery-mode %d", delmode);
+		return;
+	}
+
+	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
+	vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
+
+	if (!CPU_ISSET(vlapic->vcpuid, &dmask))
+		return;
+
+	VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
+	vlapic_set_tmr(vlapic, vector, true);
+}
diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
index 98f377e..d2fc6d9 100644
--- a/sys/amd64/vmm/io/vlapic.h
+++ b/sys/amd64/vmm/io/vlapic.h
@@ -30,74 +30,45 @@
 #define	_VLAPIC_H_
 
 struct vm;
- 
-/*
- * Map of APIC Registers:       Offset  Description          		 	Access
- */
-#define APIC_OFFSET_ID 		0x20    // Local APIC ID               		R/W
-#define APIC_OFFSET_VER 	0x30    // Local APIC Version              	R
-#define APIC_OFFSET_TPR 	0x80    // Task Priority Register          	R/W
-#define APIC_OFFSET_APR 	0x90    // Arbitration Priority Register   	R
-#define APIC_OFFSET_PPR 	0xA0    // Processor Priority Register     	R
-#define APIC_OFFSET_EOI 	0xB0    // EOI Register                    	W
-#define APIC_OFFSET_RRR 	0xC0    // Remote read                     	R
-#define APIC_OFFSET_LDR 	0xD0    // Logical Destination             	R/W
-#define APIC_OFFSET_DFR 	0xE0    // Destination Format Register     	0..27 R;  28..31 R/W
-#define APIC_OFFSET_SVR 	0xF0    // Spurious Interrupt Vector Reg.  	0..3  R;  4..9   R/W
-#define APIC_OFFSET_ISR0 	0x100   // ISR  000-031                    	R
-#define APIC_OFFSET_ISR1 	0x110   // ISR  032-063                    	R
-#define APIC_OFFSET_ISR2 	0x120   // ISR  064-095                    	R
-#define APIC_OFFSET_ISR3 	0x130   // ISR  095-128                    	R
-#define APIC_OFFSET_ISR4 	0x140   // ISR  128-159                    	R
-#define APIC_OFFSET_ISR5 	0x150   // ISR  160-191                    	R
-#define APIC_OFFSET_ISR6 	0x160   // ISR  192-223                    	R
-#define APIC_OFFSET_ISR7 	0x170   // ISR  224-255                    	R
-#define APIC_OFFSET_TMR0 	0x180   // TMR  000-031                    	R
-#define APIC_OFFSET_TMR1 	0x190   // TMR  032-063                    	R
-#define APIC_OFFSET_TMR2 	0x1A0   // TMR  064-095                    	R
-#define APIC_OFFSET_TMR3 	0x1B0   // TMR  095-128                    	R
-#define APIC_OFFSET_TMR4 	0x1C0   // TMR  128-159                    	R
-#define APIC_OFFSET_TMR5 	0x1D0   // TMR  160-191                    	R
-#define APIC_OFFSET_TMR6 	0x1E0   // TMR  192-223                    	R
-#define APIC_OFFSET_TMR7 	0x1F0   // TMR  224-255                    	R
-#define APIC_OFFSET_IRR0 	0x200   // IRR  000-031                    	R
-#define APIC_OFFSET_IRR1 	0x210   // IRR  032-063                    	R
-#define APIC_OFFSET_IRR2 	0x220   // IRR  064-095                    	R
-#define APIC_OFFSET_IRR3 	0x230   // IRR  095-128                    	R
-#define APIC_OFFSET_IRR4 	0x240   // IRR  128-159                    	R
-#define APIC_OFFSET_IRR5 	0x250   // IRR  160-191                    	R
-#define APIC_OFFSET_IRR6 	0x260   // IRR  192-223                    	R
-#define APIC_OFFSET_IRR7 	0x270   // IRR  224-255                    	R
-#define APIC_OFFSET_ESR		0x280   // Error Status Register           	R
-#define APIC_OFFSET_CMCI_LVT 	0x2F0   // Local Vector Table (CMCI)      	R/W
-#define APIC_OFFSET_ICR_LOW 	0x300   // Interrupt Command Reg. (0-31)   	R/W
-#define APIC_OFFSET_ICR_HI 	0x310   // Interrupt Command Reg. (32-63)  	R/W
-#define APIC_OFFSET_TIMER_LVT 	0x320   // Local Vector Table (Timer)      	R/W
-#define APIC_OFFSET_THERM_LVT 	0x330   // Local Vector Table (Thermal)    	R/W (PIV+)
-#define APIC_OFFSET_PERF_LVT 	0x340   // Local Vector Table (Performance) 	R/W (P6+)
-#define APIC_OFFSET_LINT0_LVT 	0x350   // Local Vector Table (LINT0)      	R/W
-#define APIC_OFFSET_LINT1_LVT 	0x360 	// Local Vector Table (LINT1)      	R/W
-#define APIC_OFFSET_ERROR_LVT 	0x370   // Local Vector Table (ERROR)      	R/W
-#define APIC_OFFSET_ICR 	0x380   // Initial Count Reg. for Timer    	R/W
-#define APIC_OFFSET_CCR 	0x390   // Current Count of Timer          	R
-#define APIC_OFFSET_DCR 	0x3E0   // Timer Divide Configuration Reg. 	R/W
-
-/*
- * 16 priority levels with at most one vector injected per level.
- */
-#define	ISRVEC_STK_SIZE		(16 + 1)
-
 enum x2apic_state;
 
-struct vlapic *vlapic_init(struct vm *vm, int vcpuid);
-void vlapic_cleanup(struct vlapic *vlapic);
 int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data,
     bool *retu);
 int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data,
     bool *retu);
-int vlapic_pending_intr(struct vlapic *vlapic);
+
+/*
+ * Returns 0 if there is no eligible vector that can be delivered to the
+ * guest at this time and non-zero otherwise.
+ *
+ * If an eligible vector number is found and 'vecptr' is not NULL then it will
+ * be stored in the location pointed to by 'vecptr'.
+ *
+ * Note that the vector does not automatically transition to the ISR as a
+ * result of calling this function.
+ */
+int vlapic_pending_intr(struct vlapic *vlapic, int *vecptr);
+
+/*
+ * Transition 'vector' from IRR to ISR. This function is called with the
+ * vector returned by 'vlapic_pending_intr()' when the guest is able to
+ * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
+ * block interrupt delivery).
+ */
 void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
-void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
+
+/*
+ * Returns 1 if the vcpu needs to be notified of the interrupt and 0 otherwise.
+ */
+int vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
+
+/*
+ * Post an interrupt to the vcpu running on 'hostcpu'. This will use a
+ * hardware assist if available (e.g. Posted Interrupt) or fall back to
+ * sending an 'ipinum' to interrupt the 'hostcpu'.
+ */
+void vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum);
+
 void vlapic_set_error(struct vlapic *vlapic, uint32_t mask);
 void vlapic_fire_cmci(struct vlapic *vlapic);
 int vlapic_trigger_lvt(struct vlapic *vlapic, int vector);
@@ -109,4 +80,26 @@ bool vlapic_enabled(struct vlapic *vlapic);
 
 void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
     int delmode, int vec);
+
+/* Reset the trigger-mode bits for all vectors to be edge-triggered */
+void vlapic_reset_tmr(struct vlapic *vlapic);
+
+/*
+ * Set the trigger-mode bit associated with 'vector' to level-triggered if
+ * the (dest,phys,delmode) tuple resolves to an interrupt being delivered to
+ * this 'vlapic'.
+ */
+void vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
+    int delmode, int vector);
+
+/* APIC write handlers */
+void vlapic_id_write_handler(struct vlapic *vlapic);
+void vlapic_ldr_write_handler(struct vlapic *vlapic);
+void vlapic_dfr_write_handler(struct vlapic *vlapic);
+void vlapic_svr_write_handler(struct vlapic *vlapic);
+void vlapic_esr_write_handler(struct vlapic *vlapic);
+int vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu);
+void vlapic_icrtmr_write_handler(struct vlapic *vlapic);
+void vlapic_dcr_write_handler(struct vlapic *vlapic);
+void vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset);
 #endif	/* _VLAPIC_H_ */
diff --git a/sys/amd64/vmm/io/vlapic_priv.h b/sys/amd64/vmm/io/vlapic_priv.h
new file mode 100644
index 0000000..a4e96aa
--- /dev/null
+++ b/sys/amd64/vmm/io/vlapic_priv.h
@@ -0,0 +1,185 @@
+/*-
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VLAPIC_PRIV_H_
+#define	_VLAPIC_PRIV_H_
+
+#include <x86/apicreg.h>
+
+/*
+ * APIC Register:		Offset	   Description
+ */
+#define APIC_OFFSET_ID		0x20	/* Local APIC ID		*/
+#define APIC_OFFSET_VER		0x30	/* Local APIC Version		*/
+#define APIC_OFFSET_TPR		0x80	/* Task Priority Register	*/
+#define APIC_OFFSET_APR		0x90	/* Arbitration Priority		*/
+#define APIC_OFFSET_PPR		0xA0	/* Processor Priority Register	*/
+#define APIC_OFFSET_EOI		0xB0	/* EOI Register			*/
+#define APIC_OFFSET_RRR		0xC0	/* Remote read			*/
+#define APIC_OFFSET_LDR		0xD0	/* Logical Destination		*/
+#define APIC_OFFSET_DFR		0xE0	/* Destination Format Register	*/
+#define APIC_OFFSET_SVR		0xF0	/* Spurious Vector Register	*/
+#define APIC_OFFSET_ISR0	0x100	/* In Service Register		*/
+#define APIC_OFFSET_ISR1	0x110
+#define APIC_OFFSET_ISR2	0x120
+#define APIC_OFFSET_ISR3	0x130
+#define APIC_OFFSET_ISR4	0x140
+#define APIC_OFFSET_ISR5	0x150
+#define APIC_OFFSET_ISR6	0x160
+#define APIC_OFFSET_ISR7	0x170
+#define APIC_OFFSET_TMR0	0x180	/* Trigger Mode Register	*/
+#define APIC_OFFSET_TMR1	0x190
+#define APIC_OFFSET_TMR2	0x1A0
+#define APIC_OFFSET_TMR3	0x1B0
+#define APIC_OFFSET_TMR4	0x1C0
+#define APIC_OFFSET_TMR5	0x1D0
+#define APIC_OFFSET_TMR6	0x1E0
+#define APIC_OFFSET_TMR7	0x1F0
+#define APIC_OFFSET_IRR0	0x200	/* Interrupt Request Register	*/
+#define APIC_OFFSET_IRR1	0x210
+#define APIC_OFFSET_IRR2	0x220
+#define APIC_OFFSET_IRR3	0x230
+#define APIC_OFFSET_IRR4	0x240
+#define APIC_OFFSET_IRR5	0x250
+#define APIC_OFFSET_IRR6	0x260
+#define APIC_OFFSET_IRR7	0x270
+#define APIC_OFFSET_ESR		0x280	/* Error Status Register	*/
+#define APIC_OFFSET_CMCI_LVT	0x2F0	/* Local Vector Table (CMCI)	*/
+#define APIC_OFFSET_ICR_LOW	0x300	/* Interrupt Command Register	*/
+#define APIC_OFFSET_ICR_HI	0x310
+#define APIC_OFFSET_TIMER_LVT	0x320	/* Local Vector Table (Timer)	*/
+#define APIC_OFFSET_THERM_LVT	0x330	/* Local Vector Table (Thermal)	*/
+#define APIC_OFFSET_PERF_LVT	0x340	/* Local Vector Table (PMC)	*/
+#define APIC_OFFSET_LINT0_LVT	0x350	/* Local Vector Table (LINT0)	*/
+#define APIC_OFFSET_LINT1_LVT	0x360	/* Local Vector Table (LINT1)	*/
+#define APIC_OFFSET_ERROR_LVT	0x370	/* Local Vector Table (ERROR)	*/
+#define APIC_OFFSET_TIMER_ICR	0x380	/* Timer's Initial Count	*/
+#define APIC_OFFSET_TIMER_CCR	0x390	/* Timer's Current Count	*/
+#define APIC_OFFSET_TIMER_DCR	0x3E0	/* Timer's Divide Configuration	*/
+
+#define	VLAPIC_CTR0(vlapic, format)					\
+	VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
+
+#define	VLAPIC_CTR1(vlapic, format, p1)					\
+	VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
+
+#define	VLAPIC_CTR2(vlapic, format, p1, p2)				\
+	VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
+
+#define	VLAPIC_CTR_IRR(vlapic, msg)					\
+do {									\
+	uint32_t *irrptr = &(vlapic)->apic_page->irr0;			\
+	irrptr[0] = irrptr[0];	/* silence compiler */			\
+	VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]);	\
+} while (0)
+
+#define	VLAPIC_CTR_ISR(vlapic, msg)					\
+do {									\
+	uint32_t *isrptr = &(vlapic)->apic_page->isr0;			\
+	isrptr[0] = isrptr[0];	/* silence compiler */			\
+	VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]);	\
+	VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]);	\
+} while (0)
+
+enum boot_state {
+	BS_INIT,
+	BS_SIPI,
+	BS_RUNNING
+};
+
+/*
+ * 16 priority levels with at most one vector injected per level.
+ */
+#define	ISRVEC_STK_SIZE		(16 + 1)
+
+#define VLAPIC_MAXLVT_INDEX	APIC_LVT_CMCI
+
+struct vlapic;
+
+struct vlapic_ops {
+	int (*set_intr_ready)(struct vlapic *vlapic, int vector, bool level);
+	int (*pending_intr)(struct vlapic *vlapic, int *vecptr);
+	void (*intr_accepted)(struct vlapic *vlapic, int vector);
+	void (*post_intr)(struct vlapic *vlapic, int hostcpu);
+	void (*set_tmr)(struct vlapic *vlapic, int vector, bool level);
+};
+
+struct vlapic {
+	struct vm		*vm;
+	int			vcpuid;
+	struct LAPIC		*apic_page;
+	struct vlapic_ops	ops;
+
+	uint32_t		esr_pending;
+	int			esr_firing;
+
+	struct callout	callout;	/* vlapic timer */
+	struct bintime	timer_fire_bt;	/* callout expiry time */
+	struct bintime	timer_freq_bt;	/* timer frequency */
+	struct bintime	timer_period_bt; /* timer period */
+	struct mtx	timer_mtx;
+
+	/*
+	 * The 'isrvec_stk' is a stack of vectors injected by the local apic.
+	 * A vector is popped from the stack when the processor does an EOI.
+	 * The vector on the top of the stack is used to compute the
+	 * Processor Priority in conjunction with the TPR.
+	 */
+	uint8_t		isrvec_stk[ISRVEC_STK_SIZE];
+	int		isrvec_stk_top;
+
+	uint64_t	msr_apicbase;
+	enum boot_state	boot_state;
+
+	/*
+	 * Copies of some registers in the virtual APIC page. We do this for
+	 * a couple of different reasons:
+	 * - to be able to detect what changed (e.g. svr_last)
+	 * - to maintain a coherent snapshot of the register (e.g. lvt_last)
+	 */
+	uint32_t	svr_last;
+	uint32_t	lvt_last[VLAPIC_MAXLVT_INDEX + 1];
+};
+
+void vlapic_init(struct vlapic *vlapic);
+void vlapic_cleanup(struct vlapic *vlapic);
+
+#endif	/* _VLAPIC_PRIV_H_ */
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index f471218b..2c86068 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -124,19 +124,25 @@ struct vm {
 	 * An active vcpu is one that has been started implicitly (BSP) or
 	 * explicitly (AP) by sending it a startup ipi.
 	 */
-	cpuset_t	active_cpus;
+	volatile cpuset_t active_cpus;
+
+	struct mtx	rendezvous_mtx;
+	cpuset_t	rendezvous_req_cpus;
+	cpuset_t	rendezvous_done_cpus;
+	void		*rendezvous_arg;
+	vm_rendezvous_func_t rendezvous_func;
 };
 
 static int vmm_initialized;
 
 static struct vmm_ops *ops;
-#define	VMM_INIT()	(ops != NULL ? (*ops->init)() : 0)
+#define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
 #define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
 #define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
 
 #define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
-#define	VMRUN(vmi, vcpu, rip, pmap) \
-	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO)
+#define	VMRUN(vmi, vcpu, rip, pmap, rptr) \
+	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr) : ENXIO)
 #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
 #define	VMSPACE_ALLOC(min, max) \
 	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
@@ -156,6 +162,10 @@ static struct vmm_ops *ops;
 	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
 #define	VMSETCAP(vmi, vcpu, num, val)		\
 	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
+#define	VLAPIC_INIT(vmi, vcpu)			\
+	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
+#define	VLAPIC_CLEANUP(vmi, vlapic)		\
+	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
 
 #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	fpu_stop_emulating()	clts()
@@ -166,10 +176,20 @@ CTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
 /* statistics */
 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
 
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+
+static int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+    "IPI vector used for vcpu notifications");
+
+static void vm_deactivate_cpu(struct vm *vm, int vcpuid);
+
 static void
-vcpu_cleanup(struct vcpu *vcpu)
+vcpu_cleanup(struct vm *vm, int i)
 {
-	vlapic_cleanup(vcpu->vlapic);
+	struct vcpu *vcpu = &vm->vcpu[i];
+
+	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
 	vmm_stat_free(vcpu->stats);	
 	fpu_save_area_free(vcpu->guestfpu);
 }
@@ -184,7 +204,7 @@ vcpu_init(struct vm *vm, uint32_t vcpu_id)
 	vcpu_lock_init(vcpu);
 	vcpu->hostcpu = NOCPU;
 	vcpu->vcpuid = vcpu_id;
-	vcpu->vlapic = vlapic_init(vm, vcpu_id);
+	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
 	vcpu->guestfpu = fpu_save_area_alloc();
 	fpu_save_area_reset(vcpu->guestfpu);
@@ -216,7 +236,10 @@ vmm_init(void)
 	int error;
 
 	vmm_host_state_init();
-	vmm_ipi_init();
+
+	vmm_ipinum = vmm_ipi_alloc();
+	if (vmm_ipinum == 0)
+		vmm_ipinum = IPI_AST;
 
 	error = vmm_mem_init();
 	if (error)
@@ -232,7 +255,7 @@ vmm_init(void)
 	vmm_msr_init();
 	vmm_resume_p = vmm_resume;
 
-	return (VMM_INIT());
+	return (VMM_INIT(vmm_ipinum));
 }
 
 static int
@@ -253,7 +276,8 @@ vmm_handler(module_t mod, int what, void *arg)
 		if (error == 0) {
 			vmm_resume_p = NULL;
 			iommu_cleanup();
-			vmm_ipi_cleanup();
+			if (vmm_ipinum != IPI_AST)
+				vmm_ipi_free(vmm_ipinum);
 			error = VMM_CLEANUP();
 			/*
 			 * Something bad happened - prevent new
@@ -288,8 +312,6 @@ static moduledata_t vmm_kmod = {
 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
 MODULE_VERSION(vmm, 1);
 
-SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
-
 int
 vm_create(const char *name, struct vm **retvm)
 {
@@ -315,6 +337,8 @@ vm_create(const char *name, struct vm **retvm)
 
 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
+	vm->vmspace = vmspace;
+	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
 	vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
 	vm->vioapic = vioapic_init(vm);
 	vm->vhpet = vhpet_init(vm);
@@ -325,7 +349,6 @@ vm_create(const char *name, struct vm **retvm)
 	}
 
 	vm_activate_cpu(vm, BSP);
-	vm->vmspace = vmspace;
 
 	*retvm = vm;
 	return (0);
@@ -360,7 +383,7 @@ vm_destroy(struct vm *vm)
 	vm->num_mem_segs = 0;
 
 	for (i = 0; i < VM_MAXCPU; i++)
-		vcpu_cleanup(&vm->vcpu[i]);
+		vcpu_cleanup(vm, i);
 
 	VMSPACE_FREE(vm->vmspace);
 
@@ -866,6 +889,63 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
 		panic("Error %d setting state to %d", error, newstate);
 }
 
+static void
+vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
+{
+
+	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
+
+	/*
+	 * Update 'rendezvous_func' and execute a write memory barrier to
+	 * ensure that it is visible across all host cpus. This is not needed
+	 * for correctness but it does ensure that all the vcpus will notice
+	 * that the rendezvous is requested immediately.
+	 */
+	vm->rendezvous_func = func;
+	wmb();
+}
+
+#define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
+	do {								\
+		if (vcpuid >= 0)					\
+			VCPU_CTR0(vm, vcpuid, fmt);			\
+		else							\
+			VM_CTR0(vm, fmt);				\
+	} while (0)
+
+static void
+vm_handle_rendezvous(struct vm *vm, int vcpuid)
+{
+
+	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
+	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
+
+	mtx_lock(&vm->rendezvous_mtx);
+	while (vm->rendezvous_func != NULL) {
+		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
+		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
+
+		if (vcpuid != -1 &&
+		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
+		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
+			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
+			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
+			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
+		}
+		if (CPU_CMP(&vm->rendezvous_req_cpus,
+		    &vm->rendezvous_done_cpus) == 0) {
+			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
+			vm_set_rendezvous_func(vm, NULL);
+			wakeup(&vm->rendezvous_func);
+			break;
+		}
+		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
+		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
+		    "vmrndv", 0);
+	}
+	mtx_unlock(&vm->rendezvous_mtx);
+}
+
 /*
  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
  */
@@ -874,9 +954,10 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 {
 	struct vm_exit *vmexit;
 	struct vcpu *vcpu;
-	int t, timo;
+	int t, timo, spindown;
 
 	vcpu = &vm->vcpu[vcpuid];
+	spindown = 0;
 
 	vcpu_lock(vcpu);
 
@@ -888,7 +969,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 	 * returned from VMRUN() and before we grabbed the vcpu lock.
 	 */
 	if (!vm_nmi_pending(vm, vcpuid) &&
-	    (intr_disabled || vlapic_pending_intr(vcpu->vlapic) < 0)) {
+	    (intr_disabled || !vlapic_pending_intr(vcpu->vlapic, NULL))) {
 		t = ticks;
 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
 		if (vlapic_enabled(vcpu->vlapic)) {
@@ -903,16 +984,25 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 			 * Spindown the vcpu if the apic is disabled and it
 			 * had entered the halted state.
 			 */
-			*retu = true;
-			vmexit = vm_exitinfo(vm, vcpuid);
-			vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
-			VCPU_CTR0(vm, vcpuid, "spinning down cpu");
+			spindown = 1;
 		}
 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
 	}
 	vcpu_unlock(vcpu);
 
+	/*
+	 * Since 'vm_deactivate_cpu()' grabs a sleep mutex we must call it
+	 * outside the confines of the vcpu spinlock.
+	 */
+	if (spindown) {
+		*retu = true;
+		vmexit = vm_exitinfo(vm, vcpuid);
+		vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
+		vm_deactivate_cpu(vm, vcpuid);
+		VCPU_CTR0(vm, vcpuid, "spinning down cpu");
+	}
+
 	return (0);
 }
 
@@ -1042,7 +1132,7 @@ restart:
 
 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
 	vcpu->hostcpu = curcpu;
-	error = VMRUN(vm->cookie, vcpuid, rip, pmap);
+	error = VMRUN(vm->cookie, vcpuid, rip, pmap, &vm->rendezvous_func);
 	vcpu->hostcpu = NOCPU;
 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
 
@@ -1056,6 +1146,14 @@ restart:
 	if (error == 0) {
 		retu = false;
 		switch (vme->exitcode) {
+		case VM_EXITCODE_IOAPIC_EOI:
+			vioapic_process_eoi(vm, vcpuid,
+			    vme->u.ioapic_eoi.vector);
+			break;
+		case VM_EXITCODE_RENDEZVOUS:
+			vm_handle_rendezvous(vm, vcpuid);
+			error = 0;
+			break;
 		case VM_EXITCODE_HLT:
 			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
 			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
@@ -1111,7 +1209,7 @@ vm_inject_nmi(struct vm *vm, int vcpuid)
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu->nmi_pending = 1;
-	vcpu_notify_event(vm, vcpuid);
+	vcpu_notify_event(vm, vcpuid, false);
 	return (0);
 }
 
@@ -1286,8 +1384,37 @@ void
 vm_activate_cpu(struct vm *vm, int vcpuid)
 {
 
-	if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
-		CPU_SET(vcpuid, &vm->active_cpus);
+	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU,
+	    ("vm_activate_cpu: invalid vcpuid %d", vcpuid));
+	KASSERT(!CPU_ISSET(vcpuid, &vm->active_cpus),
+	    ("vm_activate_cpu: vcpuid %d is already active", vcpuid));
+
+	VCPU_CTR0(vm, vcpuid, "activated");
+	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
+}
+
+static void
+vm_deactivate_cpu(struct vm *vm, int vcpuid)
+{
+
+	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU,
+	    ("vm_deactivate_cpu: invalid vcpuid %d", vcpuid));
+	KASSERT(CPU_ISSET(vcpuid, &vm->active_cpus),
+	    ("vm_deactivate_cpu: vcpuid %d is not active", vcpuid));
+
+	VCPU_CTR0(vm, vcpuid, "deactivated");
+	CPU_CLR_ATOMIC(vcpuid, &vm->active_cpus);
+
+	/*
+	 * If a vcpu rendezvous is in progress then it could be blocked
+	 * on 'vcpuid' - unblock it before disappearing forever.
+	 */
+	mtx_lock(&vm->rendezvous_mtx);
+	if (vm->rendezvous_func != NULL) {
+		VCPU_CTR0(vm, vcpuid, "unblock rendezvous after deactivation");
+		wakeup(&vm->rendezvous_func);
+	}
+	mtx_unlock(&vm->rendezvous_mtx);
 }
 
 cpuset_t
@@ -1339,7 +1466,7 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
  */
 void
-vcpu_notify_event(struct vm *vm, int vcpuid)
+vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
 {
 	int hostcpu;
 	struct vcpu *vcpu;
@@ -1354,8 +1481,13 @@ vcpu_notify_event(struct vm *vm, int vcpuid)
 	} else {
 		if (vcpu->state != VCPU_RUNNING)
 			panic("invalid vcpu state %d", vcpu->state);
-		if (hostcpu != curcpu)
-			ipi_cpu(hostcpu, vmm_ipinum);
+		if (hostcpu != curcpu) {
+			if (lapic_intr)
+				vlapic_post_intr(vcpu->vlapic, hostcpu,
+				    vmm_ipinum);
+			else
+				ipi_cpu(hostcpu, vmm_ipinum);
+		}
 	}
 	vcpu_unlock(vcpu);
 }
@@ -1375,3 +1507,51 @@ vm_apicid2vcpuid(struct vm *vm, int apicid)
 	 */
 	return (apicid);
 }
+
+void
+vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
+    vm_rendezvous_func_t func, void *arg)
+{
+	int i;
+
+	/*
+	 * Enforce that this function is called without any locks
+	 */
+	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
+	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
+	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
+
+restart:
+	mtx_lock(&vm->rendezvous_mtx);
+	if (vm->rendezvous_func != NULL) {
+		/*
+		 * If a rendezvous is already in progress then we need to
+		 * call the rendezvous handler in case this 'vcpuid' is one
+		 * of the targets of the rendezvous.
+		 */
+		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
+		mtx_unlock(&vm->rendezvous_mtx);
+		vm_handle_rendezvous(vm, vcpuid);
+		goto restart;
+	}
+	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
+	    "rendezvous is still in progress"));
+
+	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
+	vm->rendezvous_req_cpus = dest;
+	CPU_ZERO(&vm->rendezvous_done_cpus);
+	vm->rendezvous_arg = arg;
+	vm_set_rendezvous_func(vm, func);
+	mtx_unlock(&vm->rendezvous_mtx);
+
+	/*
+	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
+	 * vcpus so they handle the rendezvous as soon as possible.
+	 */
+	for (i = 0; i < VM_MAXCPU; i++) {
+		if (CPU_ISSET(i, &dest))
+			vcpu_notify_event(vm, i, false);
+	}
+
+	vm_handle_rendezvous(vm, vcpuid);
+}
diff --git a/sys/amd64/vmm/vmm_ipi.c b/sys/amd64/vmm/vmm_ipi.c
index 643d326..1765284 100644
--- a/sys/amd64/vmm/vmm_ipi.c
+++ b/sys/amd64/vmm/vmm_ipi.c
@@ -44,15 +44,10 @@ __FBSDID("$FreeBSD$");
 
 extern inthand_t IDTVEC(rsvd), IDTVEC(justreturn);
 
-/*
- * The default is to use the IPI_AST to interrupt a vcpu.
- */
-int vmm_ipinum = IPI_AST;
-
 CTASSERT(APIC_SPURIOUS_INT == 255);
 
-void
-vmm_ipi_init(void)
+int
+vmm_ipi_alloc(void)
 {
 	int idx;
 	uintptr_t func;
@@ -72,22 +67,27 @@ vmm_ipi_init(void)
 		ip = &idt[idx];
 		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
 		if (func == (uintptr_t)&IDTVEC(rsvd)) {
-			vmm_ipinum = idx;
-			setidt(vmm_ipinum, IDTVEC(justreturn), SDT_SYSIGT,
+			setidt(idx , IDTVEC(justreturn), SDT_SYSIGT,
 			       SEL_KPL, 0);
-			break;
+			return (idx);
 		}
 	}
-	
-	if (vmm_ipinum != IPI_AST && bootverbose) {
-		printf("vmm_ipi_init: installing ipi handler to interrupt "
-		       "vcpus at vector %d\n", vmm_ipinum);
-	}
+	return (0);
 }
 
 void
-vmm_ipi_cleanup(void)
+vmm_ipi_free(int ipinum)
 {
-	if (vmm_ipinum != IPI_AST)
-		setidt(vmm_ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+	uintptr_t func;
+	struct gate_descriptor *ip;
+
+	KASSERT(ipinum >= APIC_IPI_INTS && ipinum < APIC_SPURIOUS_INT,
+	    ("invalid ipi %d", ipinum));
+
+	ip = &idt[ipinum];
+	func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+	KASSERT(func == (uintptr_t)&IDTVEC(justreturn),
+	    ("invalid ipi %d", ipinum));
+
+	setidt(ipinum, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
 }
diff --git a/sys/amd64/vmm/vmm_ipi.h b/sys/amd64/vmm/vmm_ipi.h
index 91552e3..679d183 100644
--- a/sys/amd64/vmm/vmm_ipi.h
+++ b/sys/amd64/vmm/vmm_ipi.h
@@ -29,11 +29,7 @@
 #ifndef _VMM_IPI_H_
 #define _VMM_IPI_H_
 
-struct vm;
-
-extern int vmm_ipinum;
-
-void	vmm_ipi_init(void);
-void	vmm_ipi_cleanup(void);
+int	vmm_ipi_alloc(void);
+void	vmm_ipi_free(int num);
 
 #endif
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
index 8d915cd..47e04da 100644
--- a/sys/amd64/vmm/vmm_lapic.c
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -51,26 +51,6 @@ __FBSDID("$FreeBSD$");
 #define	MSI_X86_ADDR_LOG	0x00000004	/* Destination Mode */
 
 int
-lapic_pending_intr(struct vm *vm, int cpu)
-{
-	struct vlapic *vlapic;
-
-	vlapic = vm_lapic(vm, cpu);
-
-	return (vlapic_pending_intr(vlapic));
-}
-
-void
-lapic_intr_accepted(struct vm *vm, int cpu, int vector)
-{
-	struct vlapic *vlapic;
-
-	vlapic = vm_lapic(vm, cpu);
-
-	vlapic_intr_accepted(vlapic, vector);
-}
-
-int
 lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
 {
 	struct vlapic *vlapic;
@@ -82,10 +62,8 @@ lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
-	vlapic_set_intr_ready(vlapic, vector, level);
-
-	vcpu_notify_event(vm, cpu);
-
+	if (vlapic_set_intr_ready(vlapic, vector, level))
+		vcpu_notify_event(vm, cpu, true);
 	return (0);
 }
 
diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h
index c5c95aa..88fa948 100644
--- a/sys/amd64/vmm/vmm_lapic.h
+++ b/sys/amd64/vmm/vmm_lapic.h
@@ -43,26 +43,6 @@ int	lapic_mmio_write(void *vm, int cpu, uint64_t gpa,
 			 uint64_t wval, int size, void *arg);
 
 /*
- * Returns a vector between 32 and 255 if an interrupt is pending in the
- * IRR that can be delivered based on the current state of ISR and TPR.
- *
- * Note that the vector does not automatically transition to the ISR as a
- * result of calling this function.
- *
- * Returns -1 if there is no eligible vector that can be delivered to the
- * guest at this time.
- */
-int	lapic_pending_intr(struct vm *vm, int cpu);
-
-/*
- * Transition 'vector' from IRR to ISR. This function is called with the
- * vector returned by 'lapic_pending_intr()' when the guest is able to
- * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
- * block interrupt delivery).
- */
-void	lapic_intr_accepted(struct vm *vm, int cpu, int vector);
-
-/*
  * Signals to the LAPIC that an interrupt at 'vector' needs to be generated
  * to the 'cpu', the state is recorded in IRR.
  */
diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c
index 781fda5..0951e1e 100644
--- a/sys/amd64/vmm/vmm_stat.c
+++ b/sys/amd64/vmm/vmm_stat.c
@@ -146,7 +146,9 @@ VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening");
 VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
 VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
 VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
-VMM_STAT(VMEXIT_EPT_FAULT, "vm exits due to nested page fault");
+VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
+VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
 VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
 VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
 VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
+VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
diff --git a/sys/amd64/vmm/vmm_stat.h b/sys/amd64/vmm/vmm_stat.h
index bc58113..0190a63 100644
--- a/sys/amd64/vmm/vmm_stat.h
+++ b/sys/amd64/vmm/vmm_stat.h
@@ -116,8 +116,10 @@ VMM_STAT_DECLARE(VMEXIT_INTR_WINDOW);
 VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
 VMM_STAT_DECLARE(VMEXIT_INOUT);
 VMM_STAT_DECLARE(VMEXIT_CPUID);
-VMM_STAT_DECLARE(VMEXIT_EPT_FAULT);
+VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
+VMM_STAT_DECLARE(VMEXIT_INST_EMUL);
 VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
 VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
 VMM_STAT_DECLARE(VMEXIT_USERSPACE);
+VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
 #endif
author	jhb <jhb@FreeBSD.org>	2014-05-17 19:11:08 +0000
committer	jhb <jhb@FreeBSD.org>	2014-05-17 19:11:08 +0000
commit	bbf655f9b49cc39db4559ede5c58d302ff8f3de2 (patch)
tree	f6cf26193250fdea84a6946390d9759716c70b5c /sys/amd64
parent	7e7928763170f8b10771c099cf46224daaf67bca (diff)
download	FreeBSD-src-bbf655f9b49cc39db4559ede5c58d302ff8f3de2.zip FreeBSD-src-bbf655f9b49cc39db4559ede5c58d302ff8f3de2.tar.gz