1 files changed, 142 insertions, 10 deletions
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index c4ad484..54efe3d 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -114,6 +114,9 @@ __FBSDID("$FreeBSD$");
 #define	guest_msr_rw(vmx, msr) \
 	msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW)
 
+#define	guest_msr_ro(vmx, msr) \
+    msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_READ)
+
 #define	HANDLED		1
 #define	UNHANDLED	0
 
@@ -302,6 +305,54 @@ exit_reason_to_str(int reason)
 }
 #endif	/* KTR */
 
+static int
+vmx_allow_x2apic_msrs(struct vmx *vmx)
+{
+	int i, error;
+
+	error = 0;
+
+	/*
+	 * Allow readonly access to the following x2APIC MSRs from the guest.
+	 */
+	error += guest_msr_ro(vmx, MSR_APIC_ID);
+	error += guest_msr_ro(vmx, MSR_APIC_VERSION);
+	error += guest_msr_ro(vmx, MSR_APIC_LDR);
+	error += guest_msr_ro(vmx, MSR_APIC_SVR);
+
+	for (i = 0; i < 8; i++)
+		error += guest_msr_ro(vmx, MSR_APIC_ISR0 + i);
+
+	for (i = 0; i < 8; i++)
+		error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i);
+	
+	for (i = 0; i < 8; i++)
+		error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i);
+
+	error += guest_msr_ro(vmx, MSR_APIC_ESR);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_TIMER);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_THERMAL);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_PCINT);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT0);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT1);
+	error += guest_msr_ro(vmx, MSR_APIC_LVT_ERROR);
+	error += guest_msr_ro(vmx, MSR_APIC_ICR_TIMER);
+	error += guest_msr_ro(vmx, MSR_APIC_DCR_TIMER);
+	error += guest_msr_ro(vmx, MSR_APIC_ICR);
+
+	/*
+	 * Allow TPR, EOI and SELF_IPI MSRs to be read and written by the guest.
+	 *
+	 * These registers get special treatment described in the section
+	 * "Virtualizing MSR-Based APIC Accesses".
+	 */
+	error += guest_msr_rw(vmx, MSR_APIC_TPR);
+	error += guest_msr_rw(vmx, MSR_APIC_EOI);
+	error += guest_msr_rw(vmx, MSR_APIC_SELF_IPI);
+
+	return (error);
+}
+
 u_long
 vmx_fix_cr0(u_long cr0)
 {
@@ -1499,17 +1550,53 @@ ept_emulation_fault(uint64_t ept_qual)
 	return (TRUE);
 }
 
+static __inline int
+apic_access_virtualization(struct vmx *vmx, int vcpuid)
+{
+	uint32_t proc_ctls2;
+
+	proc_ctls2 = vmx->cap[vcpuid].proc_ctls2;
+	return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) ? 1 : 0);
+}
+
+static __inline int
+x2apic_virtualization(struct vmx *vmx, int vcpuid)
+{
+	uint32_t proc_ctls2;
+
+	proc_ctls2 = vmx->cap[vcpuid].proc_ctls2;
+	return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE) ? 1 : 0);
+}
+
 static int
-vmx_handle_apic_write(struct vlapic *vlapic, uint64_t qual)
+vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic,
+    uint64_t qual)
 {
 	int error, handled, offset;
+	uint32_t *apic_regs, vector;
 	bool retu;
 
-	if (!virtual_interrupt_delivery)
-		return (UNHANDLED);
-
 	handled = HANDLED;
 	offset = APIC_WRITE_OFFSET(qual);
+
+	if (!apic_access_virtualization(vmx, vcpuid)) {
+		/*
+		 * In general there should not be any APIC write VM-exits
+		 * unless APIC-access virtualization is enabled.
+		 *
+		 * However self-IPI virtualization can legitimately trigger
+		 * an APIC-write VM-exit so treat it specially.
+		 */
+		if (x2apic_virtualization(vmx, vcpuid) &&
+		    offset == APIC_OFFSET_SELF_IPI) {
+			apic_regs = (uint32_t *)(vlapic->apic_page);
+			vector = apic_regs[APIC_OFFSET_SELF_IPI / 4];
+			vlapic_self_ipi_handler(vlapic, vector);
+			return (HANDLED);
+		} else
+			return (UNHANDLED);
+	}
+
 	switch (offset) {
 	case APIC_OFFSET_ID:
 		vlapic_id_write_handler(vlapic);
@@ -1550,10 +1637,10 @@ vmx_handle_apic_write(struct vlapic *vlapic, uint64_t qual)
 }
 
 static bool
-apic_access_fault(uint64_t gpa)
+apic_access_fault(struct vmx *vmx, int vcpuid, uint64_t gpa)
 {
 
-	if (virtual_interrupt_delivery &&
+	if (apic_access_virtualization(vmx, vcpuid) &&
 	    (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE))
 		return (true);
 	else
@@ -1566,7 +1653,7 @@ vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
 	uint64_t qual;
 	int access_type, offset, allowed;
 
-	if (!virtual_interrupt_delivery)
+	if (!apic_access_virtualization(vmx, vcpuid))
 		return (UNHANDLED);
 
 	qual = vmexit->u.vmx.exit_qualification;
@@ -1832,7 +1919,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		 * this must be an instruction that accesses MMIO space.
 		 */
 		gpa = vmcs_gpa();
-		if (vm_mem_allocated(vmx->vm, gpa) || apic_access_fault(gpa)) {
+		if (vm_mem_allocated(vmx->vm, gpa) ||
+		    apic_access_fault(vmx, vcpu, gpa)) {
 			vmexit->exitcode = VM_EXITCODE_PAGING;
 			vmexit->u.paging.gpa = gpa;
 			vmexit->u.paging.fault_type = ept_fault_type(qual);
@@ -1873,7 +1961,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		 */
 		vmexit->inst_length = 0;
 		vlapic = vm_lapic(vmx->vm, vcpu);
-		handled = vmx_handle_apic_write(vlapic, qual);
+		handled = vmx_handle_apic_write(vmx, vcpu, vlapic, qual);
 		break;
 	case EXIT_REASON_XSETBV:
 		handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit);
@@ -2119,7 +2207,7 @@ vmx_vmcleanup(void *arg)
 	int i, error;
 	struct vmx *vmx = arg;
 
-	if (virtual_interrupt_delivery)
+	if (apic_access_virtualization(vmx, 0))
 		vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE);
 
 	for (i = 0; i < VM_MAXCPU; i++)
@@ -2571,6 +2659,49 @@ vmx_set_tmr(struct vlapic *vlapic, int vector, bool level)
 }
 
 static void
+vmx_enable_x2apic_mode(struct vlapic *vlapic)
+{
+	struct vmx *vmx;
+	struct vmcs *vmcs;
+	uint32_t proc_ctls2;
+	int vcpuid, error;
+
+	vcpuid = vlapic->vcpuid;
+	vmx = ((struct vlapic_vtx *)vlapic)->vmx;
+	vmcs = &vmx->vmcs[vcpuid];
+
+	proc_ctls2 = vmx->cap[vcpuid].proc_ctls2;
+	KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0,
+	    ("%s: invalid proc_ctls2 %#x", __func__, proc_ctls2));
+
+	proc_ctls2 &= ~PROCBASED2_VIRTUALIZE_APIC_ACCESSES;
+	proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE;
+	vmx->cap[vcpuid].proc_ctls2 = proc_ctls2;
+
+	VMPTRLD(vmcs);
+	vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2);
+	VMCLEAR(vmcs);
+
+	if (vlapic->vcpuid == 0) {
+		/*
+		 * The nested page table mappings are shared by all vcpus
+		 * so unmap the APIC access page just once.
+		 */
+		error = vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE);
+		KASSERT(error == 0, ("%s: vm_unmap_mmio error %d",
+		    __func__, error));
+
+		/*
+		 * The MSR bitmap is shared by all vcpus so modify it only
+		 * once in the context of vcpu 0.
+		 */
+		error = vmx_allow_x2apic_msrs(vmx);
+		KASSERT(error == 0, ("%s: vmx_allow_x2apic_msrs error %d",
+		    __func__, error));
+	}
+}
+
+static void
 vmx_post_intr(struct vlapic *vlapic, int hostcpu)
 {
 
@@ -2675,6 +2806,7 @@ vmx_vlapic_init(void *arg, int vcpuid)
 		vlapic->ops.pending_intr = vmx_pending_intr;
 		vlapic->ops.intr_accepted = vmx_intr_accepted;
 		vlapic->ops.set_tmr = vmx_set_tmr;
+		vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode;
 	}
 
 	if (posted_interrupts)