diff options
-rw-r--r-- | sys/amd64/vmm/intel/vmcs.h | 10 | ||||
-rw-r--r-- | sys/amd64/vmm/intel/vmx.c | 74 | ||||
-rw-r--r-- | sys/amd64/vmm/io/vlapic.c | 39 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm.c | 2 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_instruction_emul.c | 385 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_instruction_emul.h | 91 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_lapic.c | 71 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_lapic.h | 3 | ||||
-rw-r--r-- | sys/amd64/vmm/x86.c | 22 | ||||
-rw-r--r-- | sys/amd64/vmm/x86.h | 4 | ||||
-rw-r--r-- | sys/modules/vmm/Makefile | 1 |
11 files changed, 677 insertions, 25 deletions
diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h index a7cf4f6..84532f4 100644 --- a/sys/amd64/vmm/intel/vmcs.h +++ b/sys/amd64/vmm/intel/vmcs.h @@ -66,6 +66,7 @@ uint64_t vmcs_read(uint32_t encoding); #define vmcs_exit_reason() (vmcs_read(VMCS_EXIT_REASON) & 0xffff) #define vmcs_exit_qualification() vmcs_read(VMCS_EXIT_QUALIFICATION) #define vmcs_guest_cr3() vmcs_read(VMCS_GUEST_CR3) +#define vmcs_gpa() vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS) #endif /* _KERNEL */ @@ -324,4 +325,13 @@ uint64_t vmcs_read(uint32_t encoding); */ #define EXIT_QUAL_NMI_WHILE_STI_BLOCKING 3 +/* + * Exit qualification for EPT violation + */ +#define EPT_VIOLATION_DATA_READ (1UL << 0) +#define EPT_VIOLATION_DATA_WRITE (1UL << 1) +#define EPT_VIOLATION_INST_FETCH (1UL << 2) +#define EPT_VIOLATION_GLA_VALID (1UL << 7) +#define EPT_VIOLATION_XLAT_VALID (1UL << 8) + #endif diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 6689013..ed0996e 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$"); #include <machine/specialreg.h> #include <machine/vmparam.h> +#include <x86/apicreg.h> + #include <machine/vmm.h> #include "vmm_lapic.h" #include "vmm_msr.h" @@ -60,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include "vmx.h" #include "x86.h" #include "vmx_controls.h" +#include "vmm_instruction_emul.h" #define CR4_VMXE (1UL << 13) @@ -771,21 +774,17 @@ vmx_vminit(struct vm *vm) } static int -vmx_handle_cpuid(int vcpu, struct vmxctx *vmxctx) +vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx) { int handled, func; func = vmxctx->guest_rax; - handled = x86_emulate_cpuid((uint32_t*)(&vmxctx->guest_rax), - (uint32_t*)(&vmxctx->guest_rbx), (uint32_t*)(&vmxctx->guest_rcx), - (uint32_t*)(&vmxctx->guest_rdx), vcpu); -#if 0 - printf("%s: func %x rax %lx rbx %lx rcx %lx rdx %lx handled %d\n", - __func__, func, vmxctx->guest_rax, vmxctx->guest_rbx, - vmxctx->guest_rcx, vmxctx->guest_rdx, handled); -#endif - + handled = x86_emulate_cpuid(vm, vcpu, + (uint32_t*)(&vmxctx->guest_rax), + (uint32_t*)(&vmxctx->guest_rbx), + (uint32_t*)(&vmxctx->guest_rcx), + (uint32_t*)(&vmxctx->guest_rdx)); return (handled); } @@ -1146,13 +1145,54 @@ vmx_emulate_cr_access(struct vmx *vmx, int vcpu, uint64_t exitqual) } static int +vmx_lapic_fault(struct vm *vm, int cpu, + uint64_t gpa, uint64_t rip, uint64_t cr3, uint64_t ept_qual) +{ + int read, write, handled; + + /* + * For this to be a legitimate access to the local apic: + * - the GPA in the local apic page + * - the GPA must be aligned on a 16 byte boundary + */ + if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE) + return (UNHANDLED); + + if ((gpa & 0xF) != 0) + return (UNHANDLED); + + /* EPT violation on an instruction fetch doesn't make sense here */ + if (ept_qual & EPT_VIOLATION_INST_FETCH) + return (UNHANDLED); + + /* EPT violation must be a read fault or a write fault but not both */ + read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; + write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; + if ((read ^ write) == 0) + return (UNHANDLED); + + /* + * The EPT violation must have been caused by accessing a guest-physical + * address that is a translation of a guest-linear address. + */ + if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || + (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { + return (UNHANDLED); + } + + handled = lapic_mmio(vm, cpu, gpa - DEFAULT_APIC_BASE, read, rip, cr3); + + return (handled); +} + +static int vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) { int handled; struct vmcs *vmcs; struct vmxctx *vmxctx; uint32_t eax, ecx, edx; - uint64_t qual; + uint64_t qual, gpa, cr3; handled = 0; vmcs = &vmx->vmcs[vcpu]; @@ -1229,11 +1269,17 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax); break; case EXIT_REASON_CPUID: - handled = vmx_handle_cpuid(vcpu, vmxctx); + handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx); break; case EXIT_REASON_EPT_FAULT: - vmexit->exitcode = VM_EXITCODE_PAGING; - vmexit->u.paging.cr3 = vmcs_guest_cr3(); + gpa = vmcs_gpa(); + cr3 = vmcs_guest_cr3(); + handled = vmx_lapic_fault(vmx->vm, vcpu, + gpa, vmexit->rip, cr3, qual); + if (!handled) { + vmexit->exitcode = VM_EXITCODE_PAGING; + vmexit->u.paging.cr3 = cr3; + } break; default: break; diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 9b7d3cb..aedc692 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -87,7 +87,7 @@ static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic"); #define VLAPIC_VERSION (16) #define VLAPIC_MAXLVT_ENTRIES (5) -#define x2apic(vlapic) ((vlapic)->msr_apicbase & APICBASE_X2APIC) +#define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) enum boot_state { BS_INIT, @@ -433,7 +433,10 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval) struct vlapic *vlapic2; struct vm_exit *vmexit; - dest = icrval >> 32; + if (x2apic(vlapic)) + dest = icrval >> 32; + else + dest = icrval >> (32 + 24); vec = icrval & APIC_VECTOR_MASK; mode = icrval & APIC_DELMODE_MASK; @@ -703,8 +706,18 @@ vlapic_op_mem_write(void* dev, uint64_t gpa, opsize_t size, uint64_t data) lapic->svr = data; break; case APIC_OFFSET_ICR_LOW: + if (!x2apic(vlapic)) { + data &= 0xffffffff; + data |= (uint64_t)lapic->icr_hi << 32; + } retval = lapic_process_icr(vlapic, data); break; + case APIC_OFFSET_ICR_HI: + if (!x2apic(vlapic)) { + retval = 0; + lapic->icr_hi = data; + } + break; case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: reg = vlapic_get_lvt(vlapic, offset); if (!(lapic->svr & APIC_SVR_ENABLE)) { @@ -810,19 +823,26 @@ static struct io_region vlapic_mmio[VM_MAXCPU]; struct vlapic * vlapic_init(struct vm *vm, int vcpuid) { + int err; + enum x2apic_state state; struct vlapic *vlapic; + err = vm_get_x2apic_state(vm, vcpuid, &state); + if (err) + panic("vlapic_set_apicbase: err %d fetching x2apic state", err); + vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO); vlapic->vm = vm; vlapic->vcpuid = vcpuid; - vlapic->msr_apicbase = DEFAULT_APIC_BASE | - APICBASE_ENABLED | - APICBASE_X2APIC; + vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; if (vcpuid == 0) vlapic->msr_apicbase |= APICBASE_BSP; + if (state == X2APIC_ENABLED) + vlapic->msr_apicbase |= APICBASE_X2APIC; + vlapic->ops = &vlapic_dev_ops; vlapic->mmio = vlapic_mmio + vcpuid; @@ -856,6 +876,15 @@ vlapic_get_apicbase(struct vlapic *vlapic) void vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) { + int err; + enum x2apic_state state; + + err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state); + if (err) + panic("vlapic_set_apicbase: err %d fetching x2apic state", err); + + if (state == X2APIC_DISABLED) + val &= ~APICBASE_X2APIC; vlapic->msr_apicbase = val; } diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 29dbe67..764ffbb 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -160,11 +160,11 @@ vcpu_init(struct vm *vm, uint32_t vcpu_id) vcpu->hostcpu = -1; vcpu->vcpuid = vcpu_id; + vcpu->x2apic_state = X2APIC_ENABLED; vcpu->vlapic = vlapic_init(vm, vcpu_id); vcpu->guestfpu = fpu_save_area_alloc(); fpu_save_area_reset(vcpu->guestfpu); vcpu->stats = vmm_stat_alloc(); - vcpu->x2apic_state = X2APIC_ENABLED; } struct vm_exit * diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c new file mode 100644 index 0000000..fe01d69 --- /dev/null +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -0,0 +1,385 @@ +/*- + * Copyright (c) 2012 Sandvine, Inc. + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/pcpu.h> +#include <sys/systm.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/pmap.h> +#include <machine/vmparam.h> +#include <machine/vmm.h> + +#include "vmm_instruction_emul.h" + +#define GB (1024 * 1024 * 1024) + +static enum vm_reg_name gpr_map[16] = { + VM_REG_GUEST_RAX, + VM_REG_GUEST_RCX, + VM_REG_GUEST_RDX, + VM_REG_GUEST_RBX, + VM_REG_GUEST_RSP, + VM_REG_GUEST_RBP, + VM_REG_GUEST_RSI, + VM_REG_GUEST_RDI, + VM_REG_GUEST_R8, + VM_REG_GUEST_R9, + VM_REG_GUEST_R10, + VM_REG_GUEST_R11, + VM_REG_GUEST_R12, + VM_REG_GUEST_R13, + VM_REG_GUEST_R14, + VM_REG_GUEST_R15 +}; + +static void +vie_init(struct vie *vie) +{ + + bzero(vie, sizeof(struct vie)); + + vie->op_size = VIE_OP_SIZE_32BIT; + + vie->base_register = VM_REG_LAST; + vie->index_register = VM_REG_LAST; + vie->operand_register = VM_REG_LAST; +} + +static int +gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, + uint64_t *gpa, uint64_t *gpaend) +{ + vm_paddr_t hpa; + int nlevels, ptpshift, ptpindex; + uint64_t *ptpbase, pte, pgsize; + + /* + * XXX assumes 64-bit guest with 4 page walk levels + */ + nlevels = 4; + while (--nlevels >= 0) { + /* Zero out the lower 12 bits and the upper 12 bits */ + ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; + + hpa = vm_gpa2hpa(vm, ptpphys, PAGE_SIZE); + if (hpa == -1) + goto error; + + ptpbase = (uint64_t *)PHYS_TO_DMAP(hpa); + + ptpshift = PAGE_SHIFT + nlevels * 9; + ptpindex = (gla >> ptpshift) & 0x1FF; + pgsize = 1UL << ptpshift; + + pte = ptpbase[ptpindex]; + + if ((pte & PG_V) == 0) + goto error; + + if (pte & PG_PS) { + if (pgsize > 1 * GB) + goto error; + else + break; + } + + ptpphys = pte; + } + + /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ + pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; + *gpa = pte | (gla & (pgsize - 1)); + *gpaend = pte + pgsize; + return (0); + +error: + return (-1); +} + +void +vmm_fetch_instruction(struct vm *vm, uint64_t rip, uint64_t cr3, + struct vie *vie) +{ + int n, err; + uint64_t hpa, gpa, gpaend; + + /* + * XXX cache previously fetched instructions using 'rip' as the tag + */ + + vie_init(vie); + + /* + * Copy up to 15 bytes of the instruction stream into 'vie' + */ + while (vie->num_valid < VIE_INST_SIZE) { + err = gla2gpa(vm, rip, cr3, &gpa, &gpaend); + if (err) + break; + + n = min(VIE_INST_SIZE - vie->num_valid, gpaend - gpa); + + hpa = vm_gpa2hpa(vm, gpa, n); + if (hpa == -1) + break; + + bcopy((void *)PHYS_TO_DMAP(hpa), &vie->inst[vie->num_valid], n); + + rip += n; + vie->num_valid += n; + } +} + +static int +vie_peek(struct vie *vie, uint8_t *x) +{ + if (vie->num_processed < vie->num_valid) { + *x = vie->inst[vie->num_processed]; + return (0); + } else + return (-1); +} + +static void +vie_advance(struct vie *vie) +{ + if (vie->num_processed >= vie->num_valid) + panic("vie_advance: %d/%d", vie->num_processed, vie->num_valid); + + vie->num_processed++; +} + +static int +decode_rex(struct vie *vie) +{ + uint8_t x; + + if (vie_peek(vie, &x)) + return (-1); + + if (x >= 0x40 && x <= 0x4F) { + vie->rex_w = x & 0x8 ? 1 : 0; + vie->rex_r = x & 0x4 ? 1 : 0; + vie->rex_x = x & 0x2 ? 1 : 0; + vie->rex_b = x & 0x1 ? 1 : 0; + + vie_advance(vie); + } + + return (0); +} + +static int +decode_opcode(struct vie *vie) +{ + uint8_t x; + + static const uint8_t flags[256] = { + [0x89] = VIE_F_HAS_MODRM | VIE_F_FROM_REG | VIE_F_TO_RM, + [0x8B] = VIE_F_HAS_MODRM | VIE_F_FROM_RM | VIE_F_TO_REG, + [0xC7] = VIE_F_HAS_MODRM | VIE_F_FROM_IMM | VIE_F_TO_RM, + }; + + if (vie_peek(vie, &x)) + return (-1); + + vie->opcode_byte = x; + vie->opcode_flags = flags[x]; + + vie_advance(vie); + + if (vie->opcode_flags == 0) + return (-1); + else + return (0); +} + +/* + * XXX assuming 32-bit or 64-bit guest + */ +static int +decode_modrm(struct vie *vie) +{ + uint8_t x; + + if ((vie->opcode_flags & VIE_F_HAS_MODRM) == 0) + return (0); + + if (vie_peek(vie, &x)) + return (-1); + + vie->mod = (x >> 6) & 0x3; + vie->rm = (x >> 0) & 0x7; + vie->reg = (x >> 3) & 0x7; + + if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || + (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { + /* + * Table 2-5: Special Cases of REX Encodings + * + * mod=0, r/m=5 is used in the compatibility mode to + * indicate a disp32 without a base register. + * + * mod!=3, r/m=4 is used in the compatibility mode to + * indicate that the SIB byte is present. + * + * The 'b' bit in the REX prefix is don't care in + * this case. + */ + } else { + vie->rm |= (vie->rex_b << 3); + } + + vie->reg |= (vie->rex_r << 3); + + /* SIB addressing not supported yet */ + if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) + return (-1); + + vie->base_register = gpr_map[vie->rm]; + + if (vie->opcode_flags & (VIE_F_FROM_REG | VIE_F_TO_REG)) + vie->operand_register = gpr_map[vie->reg]; + + switch (vie->mod) { + case VIE_MOD_INDIRECT_DISP8: + vie->disp_bytes = 1; + break; + case VIE_MOD_INDIRECT_DISP32: + vie->disp_bytes = 4; + break; + case VIE_MOD_INDIRECT: + if (vie->rm == VIE_RM_DISP32) { + vie->disp_bytes = 4; + vie->base_register = VM_REG_LAST; /* no base */ + } + break; + } + + /* calculate the operand size */ + if (vie->rex_w) + vie->op_size = VIE_OP_SIZE_64BIT; + + if (vie->opcode_flags & VIE_F_FROM_IMM) + vie->imm_bytes = 4; + + vie_advance(vie); + + return (0); +} + +static int +decode_displacement(struct vie *vie) +{ + int n, i; + uint8_t x; + + union { + char buf[4]; + int8_t signed8; + int32_t signed32; + } u; + + if ((n = vie->disp_bytes) == 0) + return (0); + + if (n != 1 && n != 4) + panic("decode_displacement: invalid disp_bytes %d", n); + + for (i = 0; i < n; i++) { + if (vie_peek(vie, &x)) + return (-1); + + u.buf[i] = x; + vie_advance(vie); + } + + if (n == 1) + vie->displacement = u.signed8; /* sign-extended */ + else + vie->displacement = u.signed32; /* sign-extended */ + + return (0); +} + +static int +decode_immediate(struct vie *vie) +{ + int i, n; + uint8_t x; + union { + char buf[4]; + int32_t signed32; + } u; + + if ((n = vie->imm_bytes) == 0) + return (0); + + if (n != 4) + panic("decode_immediate: invalid imm_bytes %d", n); + + for (i = 0; i < n; i++) { + if (vie_peek(vie, &x)) + return (-1); + + u.buf[i] = x; + vie_advance(vie); + } + + vie->immediate = u.signed32; /* sign-extended */ + + return (0); +} + +int +vmm_decode_instruction(struct vie *vie) +{ + if (decode_rex(vie)) + return (-1); + + if (decode_opcode(vie)) + return (-1); + + if (decode_modrm(vie)) + return (-1); + + if (decode_displacement(vie)) + return (-1); + + if (decode_immediate(vie)) + return (-1); + + return (0); +} diff --git a/sys/amd64/vmm/vmm_instruction_emul.h b/sys/amd64/vmm/vmm_instruction_emul.h new file mode 100644 index 0000000..94937f2 --- /dev/null +++ b/sys/amd64/vmm/vmm_instruction_emul.h @@ -0,0 +1,91 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VMM_INSTRUCTION_EMUL_H_ +#define _VMM_INSTRUCTION_EMUL_H_ + +enum vie_op_size { + VIE_OP_SIZE_32BIT, /* default */ + VIE_OP_SIZE_64BIT, + VIE_OP_SIZE_8BIT +}; + +#define VIE_INST_SIZE 15 +struct vie { + uint8_t inst[VIE_INST_SIZE]; + + uint8_t rex_w:1, + rex_r:1, + rex_x:1, + rex_b:1; + + uint8_t mod:2, + reg:4, + rm:4; + + + uint8_t opcode_byte; + uint16_t opcode_flags; + uint8_t disp_bytes; + uint8_t imm_bytes; + + int num_valid; + int num_processed; + + enum vm_reg_name base_register; + enum vm_reg_name index_register; + enum vm_reg_name operand_register; + + int op_size; + int64_t displacement; + int64_t immediate; +}; + +#define VIE_F_HAS_MODRM (1 << 0) +#define VIE_F_FROM_RM (1 << 1) +#define VIE_F_FROM_REG (1 << 2) +#define VIE_F_TO_RM (1 << 3) +#define VIE_F_TO_REG (1 << 4) +#define VIE_F_FROM_IMM (1 << 5) + +#define VIE_MOD_INDIRECT 0 +#define VIE_MOD_INDIRECT_DISP8 1 +#define VIE_MOD_INDIRECT_DISP32 2 +#define VIE_MOD_DIRECT 3 + +#define VIE_RM_SIB 4 +#define VIE_RM_DISP32 5 + +struct vm; + +void vmm_fetch_instruction(struct vm *vm, uint64_t rip, uint64_t cr3, + struct vie *vie); + +int vmm_decode_instruction(struct vie *vie); + +#endif diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c index 13550b4..0d797e6 100644 --- a/sys/amd64/vmm/vmm_lapic.c +++ b/sys/amd64/vmm/vmm_lapic.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include "vmm_ipi.h" #include "vmm_lapic.h" #include "vlapic.h" +#include "vmm_instruction_emul.h" static int lapic_write(struct vlapic *vlapic, u_int offset, uint64_t val) @@ -174,3 +175,73 @@ lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val) return (handled); } + +int +lapic_mmio(struct vm *vm, int cpu, u_int offset, int read, + uint64_t rip, uint64_t cr3) +{ + int handled, error; + uint64_t val; + struct vie vie; + struct vlapic *vlapic; + + const int UNHANDLED = 0; + + vlapic = vm_lapic(vm, cpu); + + vmm_fetch_instruction(vm, rip, cr3, &vie); + + if (vmm_decode_instruction(&vie) != 0) + return (UNHANDLED); + + /* Only 32-bit accesses to local apic */ + if (vie.op_size != VIE_OP_SIZE_32BIT) + return (UNHANDLED); + + /* + * XXX + * The operand register in which we store the result of the + * read must be a GPR that we can modify even if the vcpu + * is "running". All the GPRs qualify except for %rsp. + * + * This is a limitation of the vm_set_register() API + * and can be fixed if necessary. + */ + if (vie.operand_register == VM_REG_GUEST_RSP) + return (UNHANDLED); + + if (read) { + if ((vie.opcode_flags & VIE_F_TO_REG) == 0) + return (UNHANDLED); + + if (vie.operand_register >= VM_REG_LAST) + return (UNHANDLED); + + handled = lapic_read(vlapic, offset, &val); + if (handled) { + error = vm_set_register(vm, cpu, vie.operand_register, + val); + if (error) + panic("lapic_mmio: error %d setting gpr %d", + error, vie.operand_register); + } + } else { + if ((vie.opcode_flags & VIE_F_FROM_REG) && + (vie.operand_register < VM_REG_LAST)) { + error = vm_get_register(vm, cpu, vie.operand_register, + &val); + if (error) { + panic("lapic_mmio: error %d getting gpr %d", + error, vie.operand_register); + } + } else if (vie.opcode_flags & VIE_F_FROM_IMM) { + val = vie.immediate; + } else { + return (UNHANDLED); + } + + handled = lapic_write(vlapic, offset, val); + } + + return (handled); +} diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h index 60f7696..7bba4e3 100644 --- a/sys/amd64/vmm/vmm_lapic.h +++ b/sys/amd64/vmm/vmm_lapic.h @@ -35,6 +35,9 @@ boolean_t lapic_msr(u_int num); int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval); int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval); +int lapic_mmio(struct vm *vm, int cpu, u_int offset, int read, + uint64_t rip, uint64_t cr3); + void lapic_timer_tick(struct vm *vm, int cpu); /* diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c index 669fa4b..47ba975 100644 --- a/sys/amd64/vmm/x86.c +++ b/sys/amd64/vmm/x86.c @@ -29,13 +29,17 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <sys/param.h> #include <sys/types.h> #include <sys/systm.h> +#include <sys/cpuset.h> #include <machine/cpufunc.h> #include <machine/md_var.h> #include <machine/specialreg.h> +#include <machine/vmm.h> + #include "x86.h" #define CPUID_VM_HIGH 0x40000000 @@ -43,10 +47,12 @@ __FBSDID("$FreeBSD$"); static const char bhyve_id[12] = "BHyVE BHyVE "; int -x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx, - uint32_t vcpu_id) +x86_emulate_cpuid(struct vm *vm, int vcpu_id, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { + int error; unsigned int func, regs[4]; + enum x2apic_state x2apic_state; func = *eax; @@ -91,6 +97,12 @@ x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx, case CPUID_0000_0001: do_cpuid(1, regs); + error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state); + if (error) { + panic("x86_emulate_cpuid: error %d " + "fetching x2apic state", error); + } + /* * Override the APIC ID only in ebx */ @@ -102,7 +114,11 @@ x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx, * Advertise x2APIC capability and Hypervisor guest. */ regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2); - regs[2] |= CPUID2_X2APIC | CPUID2_HV; + + regs[2] |= CPUID2_HV; + + if (x2apic_state != X2APIC_DISABLED) + regs[2] |= CPUID2_X2APIC; /* * Hide xsave/osxsave/avx until the FPU save/restore diff --git a/sys/amd64/vmm/x86.h b/sys/amd64/vmm/x86.h index d672831..d19e1d8 100644 --- a/sys/amd64/vmm/x86.h +++ b/sys/amd64/vmm/x86.h @@ -57,7 +57,7 @@ */ #define CPUID_0000_0001_FEAT0_VMX (1<<5) -int x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, - uint32_t *edx, uint32_t vcpu_id); +int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx); #endif diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile index eda6fb0..af93567 100644 --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -13,6 +13,7 @@ CFLAGS+= -I${.CURDIR}/../../amd64/vmm/intel .PATH: ${.CURDIR}/../../amd64/vmm SRCS+= vmm.c \ vmm_dev.c \ + vmm_instruction_emul.c \ vmm_ipi.c \ vmm_lapic.c \ vmm_mem.c \ |