summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorneel <neel@FreeBSD.org>2012-09-25 22:31:35 +0000
committerneel <neel@FreeBSD.org>2012-09-25 22:31:35 +0000
commitbc269b51afe43aab28df7ea0d543c167bb7c7d2e (patch)
treebcd31b7f03fe25c622c8a7edf72d8163a4023b8c
parentebdd69568d7fa97153aa47a86afe367476a0a1de (diff)
downloadFreeBSD-src-bc269b51afe43aab28df7ea0d543c167bb7c7d2e.zip
FreeBSD-src-bc269b51afe43aab28df7ea0d543c167bb7c7d2e.tar.gz
Add support for trapping MMIO writes to local apic registers and emulating them.
The default behavior is still to present the local apic to the guest in the x2apic mode.
-rw-r--r--sys/amd64/vmm/intel/vmcs.h10
-rw-r--r--sys/amd64/vmm/intel/vmx.c74
-rw-r--r--sys/amd64/vmm/io/vlapic.c39
-rw-r--r--sys/amd64/vmm/vmm.c2
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c385
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.h91
-rw-r--r--sys/amd64/vmm/vmm_lapic.c71
-rw-r--r--sys/amd64/vmm/vmm_lapic.h3
-rw-r--r--sys/amd64/vmm/x86.c22
-rw-r--r--sys/amd64/vmm/x86.h4
-rw-r--r--sys/modules/vmm/Makefile1
11 files changed, 677 insertions, 25 deletions
diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h
index a7cf4f6..84532f4 100644
--- a/sys/amd64/vmm/intel/vmcs.h
+++ b/sys/amd64/vmm/intel/vmcs.h
@@ -66,6 +66,7 @@ uint64_t vmcs_read(uint32_t encoding);
#define vmcs_exit_reason() (vmcs_read(VMCS_EXIT_REASON) & 0xffff)
#define vmcs_exit_qualification() vmcs_read(VMCS_EXIT_QUALIFICATION)
#define vmcs_guest_cr3() vmcs_read(VMCS_GUEST_CR3)
+#define vmcs_gpa() vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)
#endif /* _KERNEL */
@@ -324,4 +325,13 @@ uint64_t vmcs_read(uint32_t encoding);
*/
#define EXIT_QUAL_NMI_WHILE_STI_BLOCKING 3
+/*
+ * Exit qualification for EPT violation
+ */
+#define EPT_VIOLATION_DATA_READ (1UL << 0)
+#define EPT_VIOLATION_DATA_WRITE (1UL << 1)
+#define EPT_VIOLATION_INST_FETCH (1UL << 2)
+#define EPT_VIOLATION_GLA_VALID (1UL << 7)
+#define EPT_VIOLATION_XLAT_VALID (1UL << 8)
+
#endif
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 6689013..ed0996e 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$");
#include <machine/specialreg.h>
#include <machine/vmparam.h>
+#include <x86/apicreg.h>
+
#include <machine/vmm.h>
#include "vmm_lapic.h"
#include "vmm_msr.h"
@@ -60,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include "vmx.h"
#include "x86.h"
#include "vmx_controls.h"
+#include "vmm_instruction_emul.h"
#define CR4_VMXE (1UL << 13)
@@ -771,21 +774,17 @@ vmx_vminit(struct vm *vm)
}
static int
-vmx_handle_cpuid(int vcpu, struct vmxctx *vmxctx)
+vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx)
{
int handled, func;
func = vmxctx->guest_rax;
- handled = x86_emulate_cpuid((uint32_t*)(&vmxctx->guest_rax),
- (uint32_t*)(&vmxctx->guest_rbx), (uint32_t*)(&vmxctx->guest_rcx),
- (uint32_t*)(&vmxctx->guest_rdx), vcpu);
-#if 0
- printf("%s: func %x rax %lx rbx %lx rcx %lx rdx %lx handled %d\n",
- __func__, func, vmxctx->guest_rax, vmxctx->guest_rbx,
- vmxctx->guest_rcx, vmxctx->guest_rdx, handled);
-#endif
-
+ handled = x86_emulate_cpuid(vm, vcpu,
+ (uint32_t*)(&vmxctx->guest_rax),
+ (uint32_t*)(&vmxctx->guest_rbx),
+ (uint32_t*)(&vmxctx->guest_rcx),
+ (uint32_t*)(&vmxctx->guest_rdx));
return (handled);
}
@@ -1146,13 +1145,54 @@ vmx_emulate_cr_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
}
static int
+vmx_lapic_fault(struct vm *vm, int cpu,
+ uint64_t gpa, uint64_t rip, uint64_t cr3, uint64_t ept_qual)
+{
+ int read, write, handled;
+
+ /*
+ * For this to be a legitimate access to the local apic:
+ * - the GPA in the local apic page
+ * - the GPA must be aligned on a 16 byte boundary
+ */
+ if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE)
+ return (UNHANDLED);
+
+ if ((gpa & 0xF) != 0)
+ return (UNHANDLED);
+
+ /* EPT violation on an instruction fetch doesn't make sense here */
+ if (ept_qual & EPT_VIOLATION_INST_FETCH)
+ return (UNHANDLED);
+
+ /* EPT violation must be a read fault or a write fault but not both */
+ read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
+ write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
+ if ((read ^ write) == 0)
+ return (UNHANDLED);
+
+ /*
+ * The EPT violation must have been caused by accessing a guest-physical
+ * address that is a translation of a guest-linear address.
+ */
+ if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
+ (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
+ return (UNHANDLED);
+ }
+
+ handled = lapic_mmio(vm, cpu, gpa - DEFAULT_APIC_BASE, read, rip, cr3);
+
+ return (handled);
+}
+
+static int
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
{
int handled;
struct vmcs *vmcs;
struct vmxctx *vmxctx;
uint32_t eax, ecx, edx;
- uint64_t qual;
+ uint64_t qual, gpa, cr3;
handled = 0;
vmcs = &vmx->vmcs[vcpu];
@@ -1229,11 +1269,17 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax);
break;
case EXIT_REASON_CPUID:
- handled = vmx_handle_cpuid(vcpu, vmxctx);
+ handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
break;
case EXIT_REASON_EPT_FAULT:
- vmexit->exitcode = VM_EXITCODE_PAGING;
- vmexit->u.paging.cr3 = vmcs_guest_cr3();
+ gpa = vmcs_gpa();
+ cr3 = vmcs_guest_cr3();
+ handled = vmx_lapic_fault(vmx->vm, vcpu,
+ gpa, vmexit->rip, cr3, qual);
+ if (!handled) {
+ vmexit->exitcode = VM_EXITCODE_PAGING;
+ vmexit->u.paging.cr3 = cr3;
+ }
break;
default:
break;
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 9b7d3cb..aedc692 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -87,7 +87,7 @@ static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
#define VLAPIC_VERSION (16)
#define VLAPIC_MAXLVT_ENTRIES (5)
-#define x2apic(vlapic) ((vlapic)->msr_apicbase & APICBASE_X2APIC)
+#define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
enum boot_state {
BS_INIT,
@@ -433,7 +433,10 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
struct vlapic *vlapic2;
struct vm_exit *vmexit;
- dest = icrval >> 32;
+ if (x2apic(vlapic))
+ dest = icrval >> 32;
+ else
+ dest = icrval >> (32 + 24);
vec = icrval & APIC_VECTOR_MASK;
mode = icrval & APIC_DELMODE_MASK;
@@ -703,8 +706,18 @@ vlapic_op_mem_write(void* dev, uint64_t gpa, opsize_t size, uint64_t data)
lapic->svr = data;
break;
case APIC_OFFSET_ICR_LOW:
+ if (!x2apic(vlapic)) {
+ data &= 0xffffffff;
+ data |= (uint64_t)lapic->icr_hi << 32;
+ }
retval = lapic_process_icr(vlapic, data);
break;
+ case APIC_OFFSET_ICR_HI:
+ if (!x2apic(vlapic)) {
+ retval = 0;
+ lapic->icr_hi = data;
+ }
+ break;
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
reg = vlapic_get_lvt(vlapic, offset);
if (!(lapic->svr & APIC_SVR_ENABLE)) {
@@ -810,19 +823,26 @@ static struct io_region vlapic_mmio[VM_MAXCPU];
struct vlapic *
vlapic_init(struct vm *vm, int vcpuid)
{
+ int err;
+ enum x2apic_state state;
struct vlapic *vlapic;
+ err = vm_get_x2apic_state(vm, vcpuid, &state);
+ if (err)
+ panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
+
vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
vlapic->vm = vm;
vlapic->vcpuid = vcpuid;
- vlapic->msr_apicbase = DEFAULT_APIC_BASE |
- APICBASE_ENABLED |
- APICBASE_X2APIC;
+ vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
if (vcpuid == 0)
vlapic->msr_apicbase |= APICBASE_BSP;
+ if (state == X2APIC_ENABLED)
+ vlapic->msr_apicbase |= APICBASE_X2APIC;
+
vlapic->ops = &vlapic_dev_ops;
vlapic->mmio = vlapic_mmio + vcpuid;
@@ -856,6 +876,15 @@ vlapic_get_apicbase(struct vlapic *vlapic)
void
vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
{
+ int err;
+ enum x2apic_state state;
+
+ err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state);
+ if (err)
+ panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
+
+ if (state == X2APIC_DISABLED)
+ val &= ~APICBASE_X2APIC;
vlapic->msr_apicbase = val;
}
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 29dbe67..764ffbb 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -160,11 +160,11 @@ vcpu_init(struct vm *vm, uint32_t vcpu_id)
vcpu->hostcpu = -1;
vcpu->vcpuid = vcpu_id;
+ vcpu->x2apic_state = X2APIC_ENABLED;
vcpu->vlapic = vlapic_init(vm, vcpu_id);
vcpu->guestfpu = fpu_save_area_alloc();
fpu_save_area_reset(vcpu->guestfpu);
vcpu->stats = vmm_stat_alloc();
- vcpu->x2apic_state = X2APIC_ENABLED;
}
struct vm_exit *
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
new file mode 100644
index 0000000..fe01d69
--- /dev/null
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -0,0 +1,385 @@
+/*-
+ * Copyright (c) 2012 Sandvine, Inc.
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+
+#include "vmm_instruction_emul.h"
+
+#define GB (1024 * 1024 * 1024)
+
+static enum vm_reg_name gpr_map[16] = {
+ VM_REG_GUEST_RAX,
+ VM_REG_GUEST_RCX,
+ VM_REG_GUEST_RDX,
+ VM_REG_GUEST_RBX,
+ VM_REG_GUEST_RSP,
+ VM_REG_GUEST_RBP,
+ VM_REG_GUEST_RSI,
+ VM_REG_GUEST_RDI,
+ VM_REG_GUEST_R8,
+ VM_REG_GUEST_R9,
+ VM_REG_GUEST_R10,
+ VM_REG_GUEST_R11,
+ VM_REG_GUEST_R12,
+ VM_REG_GUEST_R13,
+ VM_REG_GUEST_R14,
+ VM_REG_GUEST_R15
+};
+
+static void
+vie_init(struct vie *vie)
+{
+
+ bzero(vie, sizeof(struct vie));
+
+ vie->op_size = VIE_OP_SIZE_32BIT;
+
+ vie->base_register = VM_REG_LAST;
+ vie->index_register = VM_REG_LAST;
+ vie->operand_register = VM_REG_LAST;
+}
+
+static int
+gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
+ uint64_t *gpa, uint64_t *gpaend)
+{
+ vm_paddr_t hpa;
+ int nlevels, ptpshift, ptpindex;
+ uint64_t *ptpbase, pte, pgsize;
+
+ /*
+ * XXX assumes 64-bit guest with 4 page walk levels
+ */
+ nlevels = 4;
+ while (--nlevels >= 0) {
+ /* Zero out the lower 12 bits and the upper 12 bits */
+ ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
+
+ hpa = vm_gpa2hpa(vm, ptpphys, PAGE_SIZE);
+ if (hpa == -1)
+ goto error;
+
+ ptpbase = (uint64_t *)PHYS_TO_DMAP(hpa);
+
+ ptpshift = PAGE_SHIFT + nlevels * 9;
+ ptpindex = (gla >> ptpshift) & 0x1FF;
+ pgsize = 1UL << ptpshift;
+
+ pte = ptpbase[ptpindex];
+
+ if ((pte & PG_V) == 0)
+ goto error;
+
+ if (pte & PG_PS) {
+ if (pgsize > 1 * GB)
+ goto error;
+ else
+ break;
+ }
+
+ ptpphys = pte;
+ }
+
+ /* Zero out the lower 'ptpshift' bits and the upper 12 bits */
+ pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
+ *gpa = pte | (gla & (pgsize - 1));
+ *gpaend = pte + pgsize;
+ return (0);
+
+error:
+ return (-1);
+}
+
+void
+vmm_fetch_instruction(struct vm *vm, uint64_t rip, uint64_t cr3,
+ struct vie *vie)
+{
+ int n, err;
+ uint64_t hpa, gpa, gpaend;
+
+ /*
+ * XXX cache previously fetched instructions using 'rip' as the tag
+ */
+
+ vie_init(vie);
+
+ /*
+ * Copy up to 15 bytes of the instruction stream into 'vie'
+ */
+ while (vie->num_valid < VIE_INST_SIZE) {
+ err = gla2gpa(vm, rip, cr3, &gpa, &gpaend);
+ if (err)
+ break;
+
+ n = min(VIE_INST_SIZE - vie->num_valid, gpaend - gpa);
+
+ hpa = vm_gpa2hpa(vm, gpa, n);
+ if (hpa == -1)
+ break;
+
+ bcopy((void *)PHYS_TO_DMAP(hpa), &vie->inst[vie->num_valid], n);
+
+ rip += n;
+ vie->num_valid += n;
+ }
+}
+
+static int
+vie_peek(struct vie *vie, uint8_t *x)
+{
+ if (vie->num_processed < vie->num_valid) {
+ *x = vie->inst[vie->num_processed];
+ return (0);
+ } else
+ return (-1);
+}
+
+static void
+vie_advance(struct vie *vie)
+{
+ if (vie->num_processed >= vie->num_valid)
+ panic("vie_advance: %d/%d", vie->num_processed, vie->num_valid);
+
+ vie->num_processed++;
+}
+
+static int
+decode_rex(struct vie *vie)
+{
+ uint8_t x;
+
+ if (vie_peek(vie, &x))
+ return (-1);
+
+ if (x >= 0x40 && x <= 0x4F) {
+ vie->rex_w = x & 0x8 ? 1 : 0;
+ vie->rex_r = x & 0x4 ? 1 : 0;
+ vie->rex_x = x & 0x2 ? 1 : 0;
+ vie->rex_b = x & 0x1 ? 1 : 0;
+
+ vie_advance(vie);
+ }
+
+ return (0);
+}
+
+static int
+decode_opcode(struct vie *vie)
+{
+ uint8_t x;
+
+ static const uint8_t flags[256] = {
+ [0x89] = VIE_F_HAS_MODRM | VIE_F_FROM_REG | VIE_F_TO_RM,
+ [0x8B] = VIE_F_HAS_MODRM | VIE_F_FROM_RM | VIE_F_TO_REG,
+ [0xC7] = VIE_F_HAS_MODRM | VIE_F_FROM_IMM | VIE_F_TO_RM,
+ };
+
+ if (vie_peek(vie, &x))
+ return (-1);
+
+ vie->opcode_byte = x;
+ vie->opcode_flags = flags[x];
+
+ vie_advance(vie);
+
+ if (vie->opcode_flags == 0)
+ return (-1);
+ else
+ return (0);
+}
+
+/*
+ * XXX assuming 32-bit or 64-bit guest
+ */
+static int
+decode_modrm(struct vie *vie)
+{
+ uint8_t x;
+
+ if ((vie->opcode_flags & VIE_F_HAS_MODRM) == 0)
+ return (0);
+
+ if (vie_peek(vie, &x))
+ return (-1);
+
+ vie->mod = (x >> 6) & 0x3;
+ vie->rm = (x >> 0) & 0x7;
+ vie->reg = (x >> 3) & 0x7;
+
+ if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
+ (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
+ /*
+ * Table 2-5: Special Cases of REX Encodings
+ *
+ * mod=0, r/m=5 is used in the compatibility mode to
+ * indicate a disp32 without a base register.
+ *
+ * mod!=3, r/m=4 is used in the compatibility mode to
+ * indicate that the SIB byte is present.
+ *
+ * The 'b' bit in the REX prefix is don't care in
+ * this case.
+ */
+ } else {
+ vie->rm |= (vie->rex_b << 3);
+ }
+
+ vie->reg |= (vie->rex_r << 3);
+
+ /* SIB addressing not supported yet */
+ if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
+ return (-1);
+
+ vie->base_register = gpr_map[vie->rm];
+
+ if (vie->opcode_flags & (VIE_F_FROM_REG | VIE_F_TO_REG))
+ vie->operand_register = gpr_map[vie->reg];
+
+ switch (vie->mod) {
+ case VIE_MOD_INDIRECT_DISP8:
+ vie->disp_bytes = 1;
+ break;
+ case VIE_MOD_INDIRECT_DISP32:
+ vie->disp_bytes = 4;
+ break;
+ case VIE_MOD_INDIRECT:
+ if (vie->rm == VIE_RM_DISP32) {
+ vie->disp_bytes = 4;
+ vie->base_register = VM_REG_LAST; /* no base */
+ }
+ break;
+ }
+
+ /* calculate the operand size */
+ if (vie->rex_w)
+ vie->op_size = VIE_OP_SIZE_64BIT;
+
+ if (vie->opcode_flags & VIE_F_FROM_IMM)
+ vie->imm_bytes = 4;
+
+ vie_advance(vie);
+
+ return (0);
+}
+
+static int
+decode_displacement(struct vie *vie)
+{
+ int n, i;
+ uint8_t x;
+
+ union {
+ char buf[4];
+ int8_t signed8;
+ int32_t signed32;
+ } u;
+
+ if ((n = vie->disp_bytes) == 0)
+ return (0);
+
+ if (n != 1 && n != 4)
+ panic("decode_displacement: invalid disp_bytes %d", n);
+
+ for (i = 0; i < n; i++) {
+ if (vie_peek(vie, &x))
+ return (-1);
+
+ u.buf[i] = x;
+ vie_advance(vie);
+ }
+
+ if (n == 1)
+ vie->displacement = u.signed8; /* sign-extended */
+ else
+ vie->displacement = u.signed32; /* sign-extended */
+
+ return (0);
+}
+
+static int
+decode_immediate(struct vie *vie)
+{
+ int i, n;
+ uint8_t x;
+ union {
+ char buf[4];
+ int32_t signed32;
+ } u;
+
+ if ((n = vie->imm_bytes) == 0)
+ return (0);
+
+ if (n != 4)
+ panic("decode_immediate: invalid imm_bytes %d", n);
+
+ for (i = 0; i < n; i++) {
+ if (vie_peek(vie, &x))
+ return (-1);
+
+ u.buf[i] = x;
+ vie_advance(vie);
+ }
+
+ vie->immediate = u.signed32; /* sign-extended */
+
+ return (0);
+}
+
+int
+vmm_decode_instruction(struct vie *vie)
+{
+ if (decode_rex(vie))
+ return (-1);
+
+ if (decode_opcode(vie))
+ return (-1);
+
+ if (decode_modrm(vie))
+ return (-1);
+
+ if (decode_displacement(vie))
+ return (-1);
+
+ if (decode_immediate(vie))
+ return (-1);
+
+ return (0);
+}
diff --git a/sys/amd64/vmm/vmm_instruction_emul.h b/sys/amd64/vmm/vmm_instruction_emul.h
new file mode 100644
index 0000000..94937f2
--- /dev/null
+++ b/sys/amd64/vmm/vmm_instruction_emul.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_INSTRUCTION_EMUL_H_
+#define _VMM_INSTRUCTION_EMUL_H_
+
+enum vie_op_size {
+ VIE_OP_SIZE_32BIT, /* default */
+ VIE_OP_SIZE_64BIT,
+ VIE_OP_SIZE_8BIT
+};
+
+#define VIE_INST_SIZE 15
+struct vie {
+ uint8_t inst[VIE_INST_SIZE];
+
+ uint8_t rex_w:1,
+ rex_r:1,
+ rex_x:1,
+ rex_b:1;
+
+ uint8_t mod:2,
+ reg:4,
+ rm:4;
+
+
+ uint8_t opcode_byte;
+ uint16_t opcode_flags;
+ uint8_t disp_bytes;
+ uint8_t imm_bytes;
+
+ int num_valid;
+ int num_processed;
+
+ enum vm_reg_name base_register;
+ enum vm_reg_name index_register;
+ enum vm_reg_name operand_register;
+
+ int op_size;
+ int64_t displacement;
+ int64_t immediate;
+};
+
+#define VIE_F_HAS_MODRM (1 << 0)
+#define VIE_F_FROM_RM (1 << 1)
+#define VIE_F_FROM_REG (1 << 2)
+#define VIE_F_TO_RM (1 << 3)
+#define VIE_F_TO_REG (1 << 4)
+#define VIE_F_FROM_IMM (1 << 5)
+
+#define VIE_MOD_INDIRECT 0
+#define VIE_MOD_INDIRECT_DISP8 1
+#define VIE_MOD_INDIRECT_DISP32 2
+#define VIE_MOD_DIRECT 3
+
+#define VIE_RM_SIB 4
+#define VIE_RM_DISP32 5
+
+struct vm;
+
+void vmm_fetch_instruction(struct vm *vm, uint64_t rip, uint64_t cr3,
+ struct vie *vie);
+
+int vmm_decode_instruction(struct vie *vie);
+
+#endif
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
index 13550b4..0d797e6 100644
--- a/sys/amd64/vmm/vmm_lapic.c
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include "vmm_ipi.h"
#include "vmm_lapic.h"
#include "vlapic.h"
+#include "vmm_instruction_emul.h"
static int
lapic_write(struct vlapic *vlapic, u_int offset, uint64_t val)
@@ -174,3 +175,73 @@ lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
return (handled);
}
+
+int
+lapic_mmio(struct vm *vm, int cpu, u_int offset, int read,
+ uint64_t rip, uint64_t cr3)
+{
+ int handled, error;
+ uint64_t val;
+ struct vie vie;
+ struct vlapic *vlapic;
+
+ const int UNHANDLED = 0;
+
+ vlapic = vm_lapic(vm, cpu);
+
+ vmm_fetch_instruction(vm, rip, cr3, &vie);
+
+ if (vmm_decode_instruction(&vie) != 0)
+ return (UNHANDLED);
+
+ /* Only 32-bit accesses to local apic */
+ if (vie.op_size != VIE_OP_SIZE_32BIT)
+ return (UNHANDLED);
+
+ /*
+ * XXX
+ * The operand register in which we store the result of the
+ * read must be a GPR that we can modify even if the vcpu
+ * is "running". All the GPRs qualify except for %rsp.
+ *
+ * This is a limitation of the vm_set_register() API
+ * and can be fixed if necessary.
+ */
+ if (vie.operand_register == VM_REG_GUEST_RSP)
+ return (UNHANDLED);
+
+ if (read) {
+ if ((vie.opcode_flags & VIE_F_TO_REG) == 0)
+ return (UNHANDLED);
+
+ if (vie.operand_register >= VM_REG_LAST)
+ return (UNHANDLED);
+
+ handled = lapic_read(vlapic, offset, &val);
+ if (handled) {
+ error = vm_set_register(vm, cpu, vie.operand_register,
+ val);
+ if (error)
+ panic("lapic_mmio: error %d setting gpr %d",
+ error, vie.operand_register);
+ }
+ } else {
+ if ((vie.opcode_flags & VIE_F_FROM_REG) &&
+ (vie.operand_register < VM_REG_LAST)) {
+ error = vm_get_register(vm, cpu, vie.operand_register,
+ &val);
+ if (error) {
+ panic("lapic_mmio: error %d getting gpr %d",
+ error, vie.operand_register);
+ }
+ } else if (vie.opcode_flags & VIE_F_FROM_IMM) {
+ val = vie.immediate;
+ } else {
+ return (UNHANDLED);
+ }
+
+ handled = lapic_write(vlapic, offset, val);
+ }
+
+ return (handled);
+}
diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h
index 60f7696..7bba4e3 100644
--- a/sys/amd64/vmm/vmm_lapic.h
+++ b/sys/amd64/vmm/vmm_lapic.h
@@ -35,6 +35,9 @@ boolean_t lapic_msr(u_int num);
int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval);
int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval);
+int lapic_mmio(struct vm *vm, int cpu, u_int offset, int read,
+ uint64_t rip, uint64_t cr3);
+
void lapic_timer_tick(struct vm *vm, int cpu);
/*
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
index 669fa4b..47ba975 100644
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -29,13 +29,17 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
+#include <sys/cpuset.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
+#include <machine/vmm.h>
+
#include "x86.h"
#define CPUID_VM_HIGH 0x40000000
@@ -43,10 +47,12 @@ __FBSDID("$FreeBSD$");
static const char bhyve_id[12] = "BHyVE BHyVE ";
int
-x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx,
- uint32_t vcpu_id)
+x86_emulate_cpuid(struct vm *vm, int vcpu_id,
+ uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
+ int error;
unsigned int func, regs[4];
+ enum x2apic_state x2apic_state;
func = *eax;
@@ -91,6 +97,12 @@ x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx,
case CPUID_0000_0001:
do_cpuid(1, regs);
+ error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
+ if (error) {
+ panic("x86_emulate_cpuid: error %d "
+ "fetching x2apic state", error);
+ }
+
/*
* Override the APIC ID only in ebx
*/
@@ -102,7 +114,11 @@ x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx,
* Advertise x2APIC capability and Hypervisor guest.
*/
regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
- regs[2] |= CPUID2_X2APIC | CPUID2_HV;
+
+ regs[2] |= CPUID2_HV;
+
+ if (x2apic_state != X2APIC_DISABLED)
+ regs[2] |= CPUID2_X2APIC;
/*
* Hide xsave/osxsave/avx until the FPU save/restore
diff --git a/sys/amd64/vmm/x86.h b/sys/amd64/vmm/x86.h
index d672831..d19e1d8 100644
--- a/sys/amd64/vmm/x86.h
+++ b/sys/amd64/vmm/x86.h
@@ -57,7 +57,7 @@
*/
#define CPUID_0000_0001_FEAT0_VMX (1<<5)
-int x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
- uint32_t *edx, uint32_t vcpu_id);
+int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx);
#endif
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
index eda6fb0..af93567 100644
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -13,6 +13,7 @@ CFLAGS+= -I${.CURDIR}/../../amd64/vmm/intel
.PATH: ${.CURDIR}/../../amd64/vmm
SRCS+= vmm.c \
vmm_dev.c \
+ vmm_instruction_emul.c \
vmm_ipi.c \
vmm_lapic.c \
vmm_mem.c \
OpenPOWER on IntegriCloud