summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/include/vmm.h3
-rw-r--r--sys/amd64/include/vmm_instruction_emul.h113
-rw-r--r--sys/amd64/vmm/intel/vmcs.h1
-rw-r--r--sys/amd64/vmm/intel/vmx.c45
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c481
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.h91
-rw-r--r--sys/amd64/vmm/vmm_lapic.c83
-rw-r--r--sys/amd64/vmm/vmm_lapic.h6
-rw-r--r--usr.sbin/bhyve/Makefile5
-rw-r--r--usr.sbin/bhyve/fbsdrun.c4
-rw-r--r--usr.sbin/bhyve/instruction_emul.c641
-rw-r--r--usr.sbin/bhyve/instruction_emul.h36
-rw-r--r--usr.sbin/bhyve/ioapic.c1
-rw-r--r--usr.sbin/bhyve/mem.c51
-rw-r--r--usr.sbin/bhyve/mem.h2
-rw-r--r--usr.sbin/bhyve/pci_passthru.c1
16 files changed, 649 insertions, 915 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 8f78b8f..2fb2194 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -150,6 +150,8 @@ void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
#endif /* KERNEL */
+#include <machine/vmm_instruction_emul.h>
+
#define VM_MAXCPU 8 /* maximum virtual cpus */
/*
@@ -268,6 +270,7 @@ struct vm_exit {
uint64_t cr3;
uint64_t gpa;
int rwx;
+ struct vie vie;
} paging;
/*
* VMX specific payload. Used when there is no "better"
diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h
new file mode 100644
index 0000000..4cc494b
--- /dev/null
+++ b/sys/amd64/include/vmm_instruction_emul.h
@@ -0,0 +1,113 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_INSTRUCTION_EMUL_H_
+#define _VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * The data structures 'vie' and 'vie_op' are meant to be opaque to the
+ * consumers of instruction decoding. The only reason why their contents
+ * need to be exposed is because they are part of the 'vm_exit' structure.
+ */
+struct vie_op {
+ uint8_t op_byte; /* actual opcode byte */
+ uint8_t op_type; /* type of operation (e.g. MOV) */
+ uint16_t op_flags;
+};
+
+#define VIE_INST_SIZE 15
+struct vie {
+ uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
+ uint8_t num_valid; /* size of the instruction */
+ uint8_t num_processed;
+
+ uint8_t rex_w:1, /* REX prefix */
+ rex_r:1,
+ rex_x:1,
+ rex_b:1;
+
+ uint8_t mod:2, /* ModRM byte */
+ reg:4,
+ rm:4;
+
+ uint8_t ss:2, /* SIB byte */
+ index:4,
+ base:4;
+
+ uint8_t disp_bytes;
+ uint8_t imm_bytes;
+
+ uint8_t scale;
+ int base_register; /* VM_REG_GUEST_xyz */
+ int index_register; /* VM_REG_GUEST_xyz */
+
+ int64_t displacement; /* optional addr displacement */
+ int64_t immediate; /* optional immediate operand */
+
+ uint8_t decoded; /* set to 1 if successfully decoded */
+
+ struct vie_op op; /* opcode description */
+};
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
+ uint64_t *rval, int rsize, void *arg);
+
+typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
+ uint64_t wval, int wsize, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ * s
+ */
+int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t mrr, mem_region_write_t mrw,
+ void *mrarg);
+
+#ifdef _KERNEL
+/*
+ * APIs to fetch and decode the instruction from nested page fault handler.
+ */
+int vmm_fetch_instruction(struct vm *vm, int cpuid,
+ uint64_t rip, int inst_length, uint64_t cr3,
+ struct vie *vie);
+
+int vmm_decode_instruction(struct vm *vm, int cpuid,
+ uint64_t gla, struct vie *vie);
+#endif /* _KERNEL */
+
+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h
index 84532f4..f39eed2 100644
--- a/sys/amd64/vmm/intel/vmcs.h
+++ b/sys/amd64/vmm/intel/vmcs.h
@@ -67,6 +67,7 @@ uint64_t vmcs_read(uint32_t encoding);
#define vmcs_exit_qualification() vmcs_read(VMCS_EXIT_QUALIFICATION)
#define vmcs_guest_cr3() vmcs_read(VMCS_GUEST_CR3)
#define vmcs_gpa() vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)
+#define vmcs_gla() vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)
#endif /* _KERNEL */
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 7a9cfb8..b185c57 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -63,7 +63,6 @@ __FBSDID("$FreeBSD$");
#include "vmx.h"
#include "x86.h"
#include "vmx_controls.h"
-#include "vmm_instruction_emul.h"
#define PINBASED_CTLS_ONE_SETTING \
(PINBASED_EXTINT_EXITING | \
@@ -1150,23 +1149,11 @@ vmx_emulate_cr_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
}
static int
-vmx_lapic_fault(struct vm *vm, int cpu,
- uint64_t gpa, uint64_t rip, int inst_length,
- uint64_t cr3, uint64_t ept_qual)
+vmx_ept_fault(struct vm *vm, int cpu,
+ uint64_t gla, uint64_t gpa, uint64_t rip, int inst_length,
+ uint64_t cr3, uint64_t ept_qual, struct vie *vie)
{
- int read, write, handled;
- struct vie vie;
-
- /*
- * For this to be a legitimate access to the local apic:
- * - the GPA in the local apic page
- * - the GPA must be aligned on a 16 byte boundary
- */
- if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE)
- return (UNHANDLED);
-
- if ((gpa & 0xF) != 0)
- return (UNHANDLED);
+ int read, write, error;
/* EPT violation on an instruction fetch doesn't make sense here */
if (ept_qual & EPT_VIOLATION_INST_FETCH)
@@ -1188,15 +1175,22 @@ vmx_lapic_fault(struct vm *vm, int cpu,
}
/* Fetch, decode and emulate the faulting instruction */
- if (vmm_fetch_instruction(vm, rip, inst_length, cr3, &vie) != 0)
+ if (vmm_fetch_instruction(vm, cpu, rip, inst_length, cr3, vie) != 0)
return (UNHANDLED);
- if (vmm_decode_instruction(&vie) != 0)
+ if (vmm_decode_instruction(vm, cpu, gla, vie) != 0)
return (UNHANDLED);
- handled = lapic_mmio(vm, cpu, gpa - DEFAULT_APIC_BASE, read, &vie);
+ /*
+ * Check if this is a local apic access
+ */
+ if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE)
+ return (UNHANDLED);
- return (handled);
+ error = vmm_emulate_instruction(vm, cpu, gpa, vie,
+ lapic_mmio_read, lapic_mmio_write, 0);
+
+ return (error ? UNHANDLED : HANDLED);
}
static int
@@ -1206,7 +1200,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
struct vmcs *vmcs;
struct vmxctx *vmxctx;
uint32_t eax, ecx, edx;
- uint64_t qual, gpa, cr3, intr_info;
+ uint64_t qual, gla, gpa, cr3, intr_info;
handled = 0;
vmcs = &vmx->vmcs[vcpu];
@@ -1299,11 +1293,12 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
break;
case EXIT_REASON_EPT_FAULT:
+ gla = vmcs_gla();
gpa = vmcs_gpa();
cr3 = vmcs_guest_cr3();
- handled = vmx_lapic_fault(vmx->vm, vcpu,
- gpa, vmexit->rip, vmexit->inst_length,
- cr3, qual);
+ handled = vmx_ept_fault(vmx->vm, vcpu, gla, gpa,
+ vmexit->rip, vmexit->inst_length,
+ cr3, qual, &vmexit->u.paging.vie);
if (!handled) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->u.paging.cr3 = cr3;
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 7ef4dbb..5e5399b 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -30,6 +30,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifdef _KERNEL
#include <sys/param.h>
#include <sys/pcpu.h>
#include <sys/systm.h>
@@ -40,10 +41,60 @@ __FBSDID("$FreeBSD$");
#include <machine/pmap.h>
#include <machine/vmparam.h>
#include <machine/vmm.h>
+#else /* !_KERNEL */
+#include <sys/types.h>
+#include <sys/errno.h>
-#include "vmm_instruction_emul.h"
+#include <machine/vmm.h>
+
+#include <vmmapi.h>
+#endif /* _KERNEL */
+
+
+
+/* struct vie_op.op_type */
+enum {
+ VIE_OP_TYPE_NONE = 0,
+ VIE_OP_TYPE_MOV,
+ VIE_OP_TYPE_AND,
+ VIE_OP_TYPE_LAST
+};
+
+/* struct vie_op.op_flags */
+#define VIE_OP_F_IMM (1 << 0) /* immediate operand present */
+#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */
+
+static const struct vie_op one_byte_opcodes[256] = {
+ [0x89] = {
+ .op_byte = 0x89,
+ .op_type = VIE_OP_TYPE_MOV,
+ },
+ [0x8B] = {
+ .op_byte = 0x8B,
+ .op_type = VIE_OP_TYPE_MOV,
+ },
+ [0xC7] = {
+ .op_byte = 0xC7,
+ .op_type = VIE_OP_TYPE_MOV,
+ .op_flags = VIE_OP_F_IMM,
+ },
+ [0x23] = {
+ .op_byte = 0x23,
+ .op_type = VIE_OP_TYPE_AND,
+ }
+};
+
+/* struct vie.mod */
+#define VIE_MOD_INDIRECT 0
+#define VIE_MOD_INDIRECT_DISP8 1
+#define VIE_MOD_INDIRECT_DISP32 2
+#define VIE_MOD_DIRECT 3
-#define GB (1024 * 1024 * 1024)
+/* struct vie.rm */
+#define VIE_RM_SIB 4
+#define VIE_RM_DISP32 5
+
+#define GB (1024 * 1024 * 1024)
static enum vm_reg_name gpr_map[16] = {
VM_REG_GUEST_RAX,
@@ -64,17 +115,232 @@ static enum vm_reg_name gpr_map[16] = {
VM_REG_GUEST_R15
};
+static uint64_t size2mask[] = {
+ [1] = 0xff,
+ [2] = 0xffff,
+ [4] = 0xffffffff,
+ [8] = 0xffffffffffffffff,
+};
+
+static int
+vie_valid_register(enum vm_reg_name reg)
+{
+#ifdef _KERNEL
+ /*
+ * XXX
+ * The operand register in which we store the result of the
+ * read must be a GPR that we can modify even if the vcpu
+ * is "running". All the GPRs qualify except for %rsp.
+ *
+ * This is a limitation of the vm_set_register() API
+ * and can be fixed if necessary.
+ */
+ if (reg == VM_REG_GUEST_RSP)
+ return (0);
+#endif
+ return (1);
+}
+
+static int
+vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval)
+{
+ int error;
+
+ if (!vie_valid_register(reg))
+ return (EINVAL);
+
+ error = vm_get_register(vm, vcpuid, reg, rval);
+
+ return (error);
+}
+
+static int
+vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
+ uint64_t val, int size)
+{
+ int error;
+ uint64_t origval;
+
+ if (!vie_valid_register(reg))
+ return (EINVAL);
+
+ switch (size) {
+ case 1:
+ case 2:
+ error = vie_read_register(vm, vcpuid, reg, &origval);
+ if (error)
+ return (error);
+ val &= size2mask[size];
+ val |= origval & ~size2mask[size];
+ break;
+ case 4:
+ val &= 0xffffffffUL;
+ break;
+ case 8:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ error = vm_set_register(vm, vcpuid, reg, val);
+ return (error);
+}
+
+/*
+ * The following simplifying assumptions are made during emulation:
+ *
+ * - guest is in 64-bit mode
+ * - default address size is 64-bits
+ * - default operand size is 32-bits
+ *
+ * - operand size override is not supported
+ *
+ * - address size override is not supported
+ */
+static int
+emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+ int error, size;
+ enum vm_reg_name reg;
+ uint64_t val;
+
+ size = 4;
+ error = EINVAL;
+
+ switch (vie->op.op_byte) {
+ case 0x89:
+ /*
+ * MOV from reg (ModRM:reg) to mem (ModRM:r/m)
+ * 89/r: mov r/m32, r32
+ * REX.W + 89/r mov r/m64, r64
+ */
+ if (vie->rex_w)
+ size = 8;
+ reg = gpr_map[vie->reg];
+ error = vie_read_register(vm, vcpuid, reg, &val);
+ if (error == 0) {
+ val &= size2mask[size];
+ error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ }
+ break;
+ case 0x8B:
+ /*
+ * MOV from mem (ModRM:r/m) to reg (ModRM:reg)
+ * 8B/r: mov r32, r/m32
+ * REX.W 8B/r: mov r64, r/m64
+ */
+ if (vie->rex_w)
+ size = 8;
+ error = memread(vm, vcpuid, gpa, &val, size, arg);
+ if (error == 0) {
+ reg = gpr_map[vie->reg];
+ error = vie_update_register(vm, vcpuid, reg, val, size);
+ }
+ break;
+ case 0xC7:
+ /*
+ * MOV from imm32 to mem (ModRM:r/m)
+ * C7/0 mov r/m32, imm32
+ * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits)
+ */
+ val = vie->immediate; /* already sign-extended */
+
+ if (vie->rex_w)
+ size = 8;
+
+ if (size != 8)
+ val &= size2mask[size];
+
+ error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ break;
+ default:
+ break;
+ }
+
+ return (error);
+}
+
+static int
+emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+ int error, size;
+ enum vm_reg_name reg;
+ uint64_t val1, val2;
+
+ size = 4;
+ error = EINVAL;
+
+ switch (vie->op.op_byte) {
+ case 0x23:
+ /*
+ * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the
+ * result in reg.
+ *
+ * 23/r and r32, r/m32
+ * REX.W + 23/r and r64, r/m64
+ */
+ if (vie->rex_w)
+ size = 8;
+
+ /* get the first operand */
+ reg = gpr_map[vie->reg];
+ error = vie_read_register(vm, vcpuid, reg, &val1);
+ if (error)
+ break;
+
+ /* get the second operand */
+ error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ if (error)
+ break;
+
+ /* perform the operation and write the result */
+ val1 &= val2;
+ error = vie_update_register(vm, vcpuid, reg, val1, size);
+ break;
+ default:
+ break;
+ }
+ return (error);
+}
+
+int
+vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite,
+ void *memarg)
+{
+ int error;
+
+ if (!vie->decoded)
+ return (EINVAL);
+
+ switch (vie->op.op_type) {
+ case VIE_OP_TYPE_MOV:
+ error = emulate_mov(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
+ case VIE_OP_TYPE_AND:
+ error = emulate_and(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+#ifdef _KERNEL
static void
vie_init(struct vie *vie)
{
bzero(vie, sizeof(struct vie));
- vie->op_size = VIE_OP_SIZE_32BIT;
-
vie->base_register = VM_REG_LAST;
vie->index_register = VM_REG_LAST;
- vie->operand_register = VM_REG_LAST;
}
static int
@@ -129,7 +395,7 @@ error:
}
int
-vmm_fetch_instruction(struct vm *vm, uint64_t rip, int inst_length,
+vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
uint64_t cr3, struct vie *vie)
{
int n, err;
@@ -172,6 +438,7 @@ vmm_fetch_instruction(struct vm *vm, uint64_t rip, int inst_length,
static int
vie_peek(struct vie *vie, uint8_t *x)
{
+
if (vie->num_processed < vie->num_valid) {
*x = vie->inst[vie->num_processed];
return (0);
@@ -182,8 +449,6 @@ vie_peek(struct vie *vie, uint8_t *x)
static void
vie_advance(struct vie *vie)
{
- if (vie->num_processed >= vie->num_valid)
- panic("vie_advance: %d/%d", vie->num_processed, vie->num_valid);
vie->num_processed++;
}
@@ -213,24 +478,16 @@ decode_opcode(struct vie *vie)
{
uint8_t x;
- static const uint8_t flags[256] = {
- [0x89] = VIE_F_HAS_MODRM | VIE_F_FROM_REG | VIE_F_TO_RM,
- [0x8B] = VIE_F_HAS_MODRM | VIE_F_FROM_RM | VIE_F_TO_REG,
- [0xC7] = VIE_F_HAS_MODRM | VIE_F_FROM_IMM | VIE_F_TO_RM,
- };
-
if (vie_peek(vie, &x))
return (-1);
- vie->opcode_byte = x;
- vie->opcode_flags = flags[x];
+ vie->op = one_byte_opcodes[x];
- vie_advance(vie);
-
- if (vie->opcode_flags == 0)
+ if (vie->op.op_type == VIE_OP_TYPE_NONE)
return (-1);
- else
- return (0);
+
+ vie_advance(vie);
+ return (0);
}
/*
@@ -241,9 +498,6 @@ decode_modrm(struct vie *vie)
{
uint8_t x;
- if ((vie->opcode_flags & VIE_F_HAS_MODRM) == 0)
- return (0);
-
if (vie_peek(vie, &x))
return (-1);
@@ -251,35 +505,40 @@ decode_modrm(struct vie *vie)
vie->rm = (x >> 0) & 0x7;
vie->reg = (x >> 3) & 0x7;
+ /*
+ * A direct addressing mode makes no sense in the context of an EPT
+ * fault. There has to be a memory access involved to cause the
+ * EPT fault.
+ */
+ if (vie->mod == VIE_MOD_DIRECT)
+ return (-1);
+
if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
(vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
- /*
- * Table 2-5: Special Cases of REX Encodings
- *
- * mod=0, r/m=5 is used in the compatibility mode to
- * indicate a disp32 without a base register.
- *
- * mod!=3, r/m=4 is used in the compatibility mode to
- * indicate that the SIB byte is present.
- *
- * The 'b' bit in the REX prefix is don't care in
- * this case.
- */
+ /*
+ * Table 2-5: Special Cases of REX Encodings
+ *
+ * mod=0, r/m=5 is used in the compatibility mode to
+ * indicate a disp32 without a base register.
+ *
+ * mod!=3, r/m=4 is used in the compatibility mode to
+ * indicate that the SIB byte is present.
+ *
+ * The 'b' bit in the REX prefix is don't care in
+ * this case.
+ */
} else {
vie->rm |= (vie->rex_b << 3);
}
vie->reg |= (vie->rex_r << 3);
- /* SIB addressing not supported yet */
+ /* SIB */
if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
- return (-1);
+ goto done;
vie->base_register = gpr_map[vie->rm];
- if (vie->opcode_flags & (VIE_F_FROM_REG | VIE_F_TO_REG))
- vie->operand_register = gpr_map[vie->reg];
-
switch (vie->mod) {
case VIE_MOD_INDIRECT_DISP8:
vie->disp_bytes = 1;
@@ -295,12 +554,76 @@ decode_modrm(struct vie *vie)
break;
}
- /* calculate the operand size */
- if (vie->rex_w)
- vie->op_size = VIE_OP_SIZE_64BIT;
-
- if (vie->opcode_flags & VIE_F_FROM_IMM)
+ /* Figure out immediate operand size (if any) */
+ if (vie->op.op_flags & VIE_OP_F_IMM)
vie->imm_bytes = 4;
+ else if (vie->op.op_flags & VIE_OP_F_IMM8)
+ vie->imm_bytes = 1;
+
+done:
+ vie_advance(vie);
+
+ return (0);
+}
+
+static int
+decode_sib(struct vie *vie)
+{
+ uint8_t x;
+
+ /* Proceed only if SIB byte is present */
+ if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB)
+ return (0);
+
+ if (vie_peek(vie, &x))
+ return (-1);
+
+ /* De-construct the SIB byte */
+ vie->ss = (x >> 6) & 0x3;
+ vie->index = (x >> 3) & 0x7;
+ vie->base = (x >> 0) & 0x7;
+
+ /* Apply the REX prefix modifiers */
+ vie->index |= vie->rex_x << 3;
+ vie->base |= vie->rex_b << 3;
+
+ switch (vie->mod) {
+ case VIE_MOD_INDIRECT_DISP8:
+ vie->disp_bytes = 1;
+ break;
+ case VIE_MOD_INDIRECT_DISP32:
+ vie->disp_bytes = 4;
+ break;
+ }
+
+ if (vie->mod == VIE_MOD_INDIRECT &&
+ (vie->base == 5 || vie->base == 13)) {
+ /*
+ * Special case when base register is unused if mod = 0
+ * and base = %rbp or %r13.
+ *
+ * Documented in:
+ * Table 2-3: 32-bit Addressing Forms with the SIB Byte
+ * Table 2-5: Special Cases of REX Encodings
+ */
+ vie->disp_bytes = 4;
+ } else {
+ vie->base_register = gpr_map[vie->base];
+ }
+
+ /*
+ * All encodings of 'index' are valid except for %rsp (4).
+ *
+ * Documented in:
+ * Table 2-3: 32-bit Addressing Forms with the SIB Byte
+ * Table 2-5: Special Cases of REX Encodings
+ */
+ if (vie->index != 4)
+ vie->index_register = gpr_map[vie->index];
+
+ /* 'scale' makes sense only in the context of an index register */
+ if (vie->index_register < VM_REG_LAST)
+ vie->scale = 1 << vie->ss;
vie_advance(vie);
@@ -348,13 +671,14 @@ decode_immediate(struct vie *vie)
uint8_t x;
union {
char buf[4];
+ int8_t signed8;
int32_t signed32;
} u;
if ((n = vie->imm_bytes) == 0)
return (0);
- if (n != 4)
+ if (n != 1 && n != 4)
panic("decode_immediate: invalid imm_bytes %d", n);
for (i = 0; i < n; i++) {
@@ -365,14 +689,62 @@ decode_immediate(struct vie *vie)
vie_advance(vie);
}
- vie->immediate = u.signed32; /* sign-extended */
+ if (n == 1)
+ vie->immediate = u.signed8; /* sign-extended */
+ else
+ vie->immediate = u.signed32; /* sign-extended */
return (0);
}
+#define VERIFY_GLA
+/*
+ * Verify that the 'guest linear address' provided as collateral of the nested
+ * page table fault matches with our instruction decoding.
+ */
+#ifdef VERIFY_GLA
+static int
+verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
+{
+ int error;
+ uint64_t base, idx;
+
+ base = 0;
+ if (vie->base_register != VM_REG_LAST) {
+ error = vm_get_register(vm, cpuid, vie->base_register, &base);
+ if (error) {
+ printf("verify_gla: error %d getting base reg %d\n",
+ error, vie->base_register);
+ return (-1);
+ }
+ }
+
+ idx = 0;
+ if (vie->index_register != VM_REG_LAST) {
+ error = vm_get_register(vm, cpuid, vie->index_register, &idx);
+ if (error) {
+ printf("verify_gla: error %d getting index reg %d\n",
+ error, vie->index_register);
+ return (-1);
+ }
+ }
+
+ if (base + vie->scale * idx + vie->displacement != gla) {
+ printf("verify_gla mismatch: "
+ "base(0x%0lx), scale(%d), index(0x%0lx), "
+ "disp(0x%0lx), gla(0x%0lx)\n",
+ base, vie->scale, idx, vie->displacement, gla);
+ return (-1);
+ }
+
+ return (0);
+}
+#endif /* VERIFY_GLA */
+
int
-vmm_decode_instruction(struct vie *vie)
+vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
{
+
if (decode_rex(vie))
return (-1);
@@ -382,11 +754,22 @@ vmm_decode_instruction(struct vie *vie)
if (decode_modrm(vie))
return (-1);
+ if (decode_sib(vie))
+ return (-1);
+
if (decode_displacement(vie))
return (-1);
if (decode_immediate(vie))
return (-1);
+#ifdef VERIFY_GLA
+ if (verify_gla(vm, cpuid, gla, vie))
+ return (-1);
+#endif
+
+ vie->decoded = 1; /* success */
+
return (0);
}
+#endif /* _KERNEL */
diff --git a/sys/amd64/vmm/vmm_instruction_emul.h b/sys/amd64/vmm/vmm_instruction_emul.h
deleted file mode 100644
index 1fa9e2b..0000000
--- a/sys/amd64/vmm/vmm_instruction_emul.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*-
- * Copyright (c) 2012 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _VMM_INSTRUCTION_EMUL_H_
-#define _VMM_INSTRUCTION_EMUL_H_
-
-enum vie_op_size {
- VIE_OP_SIZE_32BIT, /* default */
- VIE_OP_SIZE_64BIT,
- VIE_OP_SIZE_8BIT
-};
-
-#define VIE_INST_SIZE 15
-struct vie {
- uint8_t inst[VIE_INST_SIZE];
-
- uint8_t rex_w:1,
- rex_r:1,
- rex_x:1,
- rex_b:1;
-
- uint8_t mod:2,
- reg:4,
- rm:4;
-
-
- uint8_t opcode_byte;
- uint16_t opcode_flags;
- uint8_t disp_bytes;
- uint8_t imm_bytes;
-
- int num_valid;
- int num_processed;
-
- enum vm_reg_name base_register;
- enum vm_reg_name index_register;
- enum vm_reg_name operand_register;
-
- int op_size;
- int64_t displacement;
- int64_t immediate;
-};
-
-#define VIE_F_HAS_MODRM (1 << 0)
-#define VIE_F_FROM_RM (1 << 1)
-#define VIE_F_FROM_REG (1 << 2)
-#define VIE_F_TO_RM (1 << 3)
-#define VIE_F_TO_REG (1 << 4)
-#define VIE_F_FROM_IMM (1 << 5)
-
-#define VIE_MOD_INDIRECT 0
-#define VIE_MOD_INDIRECT_DISP8 1
-#define VIE_MOD_INDIRECT_DISP32 2
-#define VIE_MOD_DIRECT 3
-
-#define VIE_RM_SIB 4
-#define VIE_RM_DISP32 5
-
-struct vm;
-
-int vmm_fetch_instruction(struct vm *vm, uint64_t rip, int inst_length,
- uint64_t cr3, struct vie *vie);
-
-int vmm_decode_instruction(struct vie *vie);
-
-#endif
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
index bb22122..dabcf06 100644
--- a/sys/amd64/vmm/vmm_lapic.c
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -34,12 +34,12 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <x86/specialreg.h>
+#include <x86/apicreg.h>
#include <machine/vmm.h>
#include "vmm_ipi.h"
#include "vmm_lapic.h"
#include "vlapic.h"
-#include "vmm_instruction_emul.h"
static int
lapic_write(struct vlapic *vlapic, u_int offset, uint64_t val)
@@ -177,64 +177,45 @@ lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
}
int
-lapic_mmio(struct vm *vm, int cpu, u_int offset, int read, struct vie *vie)
+lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size,
+ void *arg)
{
- int handled, error;
- uint64_t val;
+ int error;
+ uint64_t off;
struct vlapic *vlapic;
- const int UNHANDLED = 0;
+ off = gpa - DEFAULT_APIC_BASE;
+
+ /*
+ * Memory mapped local apic accesses must be 4 bytes wide and
+ * aligned on a 16-byte boundary.
+ */
+ if (size != 4 || off & 0xf)
+ return (EINVAL);
vlapic = vm_lapic(vm, cpu);
+ error = vlapic_op_mem_write(vlapic, off, DWORD, wval);
+ return (error);
+}
+
+int
+lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
+ void *arg)
+{
+ int error;
+ uint64_t off;
+ struct vlapic *vlapic;
- /* Only 32-bit accesses to local apic */
- if (vie->op_size != VIE_OP_SIZE_32BIT)
- return (UNHANDLED);
+ off = gpa - DEFAULT_APIC_BASE;
/*
- * XXX
- * The operand register in which we store the result of the
- * read must be a GPR that we can modify even if the vcpu
- * is "running". All the GPRs qualify except for %rsp.
- *
- * This is a limitation of the vm_set_register() API
- * and can be fixed if necessary.
+ * Memory mapped local apic accesses must be 4 bytes wide and
+ * aligned on a 16-byte boundary.
*/
- if (vie->operand_register == VM_REG_GUEST_RSP)
- return (UNHANDLED);
-
- if (read) {
- if ((vie->opcode_flags & VIE_F_TO_REG) == 0)
- return (UNHANDLED);
-
- if (vie->operand_register >= VM_REG_LAST)
- return (UNHANDLED);
-
- handled = lapic_read(vlapic, offset, &val);
- if (handled) {
- error = vm_set_register(vm, cpu, vie->operand_register,
- val);
- if (error)
- panic("lapic_mmio: error %d setting gpr %d",
- error, vie->operand_register);
- }
- } else {
- if ((vie->opcode_flags & VIE_F_FROM_REG) &&
- (vie->operand_register < VM_REG_LAST)) {
- error = vm_get_register(vm, cpu, vie->operand_register,
- &val);
- if (error) {
- panic("lapic_mmio: error %d getting gpr %d",
- error, vie->operand_register);
- }
- } else if (vie->opcode_flags & VIE_F_FROM_IMM) {
- val = vie->immediate;
- } else {
- return (UNHANDLED);
- }
-
- handled = lapic_write(vlapic, offset, val);
- }
+ if (size != 4 || off & 0xf)
+ return (EINVAL);
- return (handled);
+ vlapic = vm_lapic(vm, cpu);
+ error = vlapic_op_mem_read(vlapic, off, DWORD, rval);
+ return (error);
}
diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h
index 59fc016..a79912e 100644
--- a/sys/amd64/vmm/vmm_lapic.h
+++ b/sys/amd64/vmm/vmm_lapic.h
@@ -30,13 +30,15 @@
#define _VMM_LAPIC_H_
struct vm;
-struct vie;
boolean_t lapic_msr(u_int num);
int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval);
int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval);
-int lapic_mmio(struct vm *vm, int cpu, u_int offset, int rd, struct vie *);
+int lapic_mmio_read(void *vm, int cpu, uint64_t gpa,
+ uint64_t *rval, int size, void *arg);
+int lapic_mmio_write(void *vm, int cpu, uint64_t gpa,
+ uint64_t wval, int size, void *arg);
int lapic_timer_tick(struct vm *vm, int cpu);
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
index 9dc7a53..c45b904 100644
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -7,11 +7,14 @@ PROG= bhyve
DEBUG_FLAGS= -g -O0
SRCS= acpi.c atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c
-SRCS+= instruction_emul.c ioapic.c mem.c mevent.c mptbl.c
+SRCS+= ioapic.c mem.c mevent.c mptbl.c
SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c pmtmr.c post.c rtc.c uart.c
SRCS+= xmsr.c spinup_ap.c
+.PATH: ${.CURDIR}/../../sys/amd64/vmm
+SRCS+= vmm_instruction_emul.c
+
NO_MAN=
DPADD= ${LIBVMMAPI} ${LIBMD} ${LIBPTHREAD}
diff --git a/usr.sbin/bhyve/fbsdrun.c b/usr.sbin/bhyve/fbsdrun.c
index 43fa797..b1c7098 100644
--- a/usr.sbin/bhyve/fbsdrun.c
+++ b/usr.sbin/bhyve/fbsdrun.c
@@ -57,7 +57,6 @@ __FBSDID("$FreeBSD$");
#include "mptbl.h"
#include "pci_emul.h"
#include "xmsr.h"
-#include "instruction_emul.h"
#include "ioapic.h"
#include "spinup_ap.h"
@@ -455,7 +454,8 @@ vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
stats.vmexit_paging++;
err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip,
- vmexit->u.paging.cr3, vmexit->u.paging.rwx);
+ vmexit->u.paging.cr3, vmexit->u.paging.rwx,
+ &vmexit->u.paging.vie);
if (err) {
if (err == EINVAL) {
diff --git a/usr.sbin/bhyve/instruction_emul.c b/usr.sbin/bhyve/instruction_emul.c
deleted file mode 100644
index 78c3608..0000000
--- a/usr.sbin/bhyve/instruction_emul.c
+++ /dev/null
@@ -1,641 +0,0 @@
-/*-
- * Copyright (c) 2012 Sandvine, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <strings.h>
-#include <unistd.h>
-#include <assert.h>
-#include <machine/vmm.h>
-#include <vmmapi.h>
-
-#include "fbsdrun.h"
-#include "mem.h"
-#include "instruction_emul.h"
-
-#define PREFIX_LOCK 0xF0
-#define PREFIX_REPNE 0xF2
-#define PREFIX_REPE 0xF3
-#define PREFIX_CS_OVERRIDE 0x2E
-#define PREFIX_SS_OVERRIDE 0x36
-#define PREFIX_DS_OVERRIDE 0x3E
-#define PREFIX_ES_OVERRIDE 0x26
-#define PREFIX_FS_OVERRIDE 0x64
-#define PREFIX_GS_OVERRIDE 0x65
-#define PREFIX_BRANCH_NOT_TAKEN 0x2E
-#define PREFIX_BRANCH_TAKEN 0x3E
-#define PREFIX_OPSIZE 0x66
-#define is_opsz_prefix(x) ((x) == PREFIX_OPSIZE)
-#define PREFIX_ADDRSIZE 0x67
-
-#define OPCODE_2BYTE_ESCAPE 0x0F
-#define OPCODE_3BYTE_ESCAPE 0x38
-
-#define MODRM_MOD_MASK 0xC0
-#define MODRM_MOD_SHIFT 6
-#define MODRM_RM_MASK 0x07
-#define MODRM_RM_SHIFT 0
-#define MODRM_REG_MASK 0x38
-#define MODRM_REG_SHIFT 3
-
-#define MOD_INDIRECT 0x0
-#define MOD_INDIRECT_DISP8 0x1
-#define MOD_INDIRECT_DISP32 0x2
-#define MOD_DIRECT 0x3
-
-#define RM_EAX 0x0
-#define RM_ECX 0x1
-#define RM_EDX 0x2
-#define RM_EBX 0x3
-#define RM_SIB 0x4
-#define RM_DISP32 0x5
-#define RM_EBP RM_DISP32
-#define RM_ESI 0x6
-#define RM_EDI 0x7
-
-#define REG_EAX 0x0
-#define REG_ECX 0x1
-#define REG_EDX 0x2
-#define REG_EBX 0x3
-#define REG_ESP 0x4
-#define REG_EBP 0x5
-#define REG_ESI 0x6
-#define REG_EDI 0x7
-#define REG_R8 0x8
-#define REG_R9 0x9
-#define REG_R10 0xA
-#define REG_R11 0xB
-#define REG_R12 0xC
-#define REG_R13 0xD
-#define REG_R14 0xE
-#define REG_R15 0xF
-
-#define HAS_MODRM 1
-#define FROM_RM (1<<1)
-#define FROM_REG (1<<2)
-#define TO_RM (1<<3)
-#define TO_REG (1<<4)
-#define ZEXT (1<<5)
-#define FROM_8 (1<<6)
-#define FROM_16 (1<<7)
-#define TO_8 (1<<8)
-#define TO_16 (1<<9)
-
-#define REX_MASK 0xF0
-#define REX_PREFIX 0x40
-#define is_rex_prefix(x) ( ((x) & REX_MASK) == REX_PREFIX )
-#define REX_W_MASK 0x8
-#define REX_R_MASK 0x4
-#define REX_X_MASK 0x2
-#define REX_B_MASK 0x1
-
-#define is_prefix(x) ((x) == PREFIX_LOCK || (x) == PREFIX_REPNE || \
- (x) == PREFIX_REPE || (x) == PREFIX_CS_OVERRIDE || \
- (x) == PREFIX_SS_OVERRIDE || (x) == PREFIX_DS_OVERRIDE || \
- (x) == PREFIX_ES_OVERRIDE || (x) == PREFIX_FS_OVERRIDE || \
- (x) == PREFIX_GS_OVERRIDE || (x) == PREFIX_BRANCH_NOT_TAKEN || \
- (x) == PREFIX_BRANCH_TAKEN || (x) == PREFIX_OPSIZE || \
- (x) == PREFIX_ADDRSIZE || is_rex_prefix((x)))
-
-#define PAGE_FRAME_MASK 0x80
-#define PAGE_OFFSET_MASK 0xFFF
-#define PAGE_TABLE_ENTRY_MASK (~PAGE_OFFSET_MASK)
-#define PML4E_OFFSET_MASK 0x0000FF8000000000
-#define PML4E_SHIFT 39
-
-#define INSTR_VERIFY
-
-struct decoded_instruction
-{
- void *instruction;
- uint8_t *opcode;
- uint8_t *modrm;
- uint8_t *sib;
- uint8_t *displacement;
- uint8_t *immediate;
-
- uint16_t opcode_flags;
-
- uint8_t addressing_mode;
- uint8_t rm;
- uint8_t reg;
- uint8_t opsz;
- uint8_t rex_r;
- uint8_t rex_w;
- uint8_t rex_b;
- uint8_t rex_x;
-
- int32_t disp;
-};
-
-static enum vm_reg_name vm_reg_name_mappings[] = {
- [REG_EAX] = VM_REG_GUEST_RAX,
- [REG_EBX] = VM_REG_GUEST_RBX,
- [REG_ECX] = VM_REG_GUEST_RCX,
- [REG_EDX] = VM_REG_GUEST_RDX,
- [REG_ESP] = VM_REG_GUEST_RSP,
- [REG_EBP] = VM_REG_GUEST_RBP,
- [REG_ESI] = VM_REG_GUEST_RSI,
- [REG_EDI] = VM_REG_GUEST_RDI,
- [REG_R8] = VM_REG_GUEST_R8,
- [REG_R9] = VM_REG_GUEST_R9,
- [REG_R10] = VM_REG_GUEST_R10,
- [REG_R11] = VM_REG_GUEST_R11,
- [REG_R12] = VM_REG_GUEST_R12,
- [REG_R13] = VM_REG_GUEST_R13,
- [REG_R14] = VM_REG_GUEST_R14,
- [REG_R15] = VM_REG_GUEST_R15
-};
-
-uint16_t one_byte_opcodes[256] = {
- [0x88] = HAS_MODRM | FROM_REG | TO_RM | TO_8 | FROM_8,
- [0x89] = HAS_MODRM | FROM_REG | TO_RM,
- [0x8B] = HAS_MODRM | FROM_RM | TO_REG,
-};
-
-uint16_t two_byte_opcodes[256] = {
- [0xB6] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_8,
- [0xB7] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_16,
-};
-
-static uintptr_t
-gla2gpa(uint64_t gla, uint64_t guest_cr3)
-{
- uint64_t *table;
- uint64_t mask, entry;
- int level, shift;
- uintptr_t page_frame;
-
- table = paddr_guest2host(guest_cr3 & PAGE_TABLE_ENTRY_MASK);
- mask = PML4E_OFFSET_MASK;
- shift = PML4E_SHIFT;
- for (level = 0; level < 4; ++level)
- {
- entry = table[(gla & mask) >> shift];
- table = (uint64_t*)(entry & PAGE_TABLE_ENTRY_MASK);
-
- /* This entry does not point to another page table */
- if (entry & PAGE_FRAME_MASK || level >= 3)
- break;
-
- table = paddr_guest2host((uintptr_t)table);
- mask >>= 9;
- shift -= 9;
- }
-
- mask = (1 << shift) - 1;
- page_frame = ((uintptr_t)table & ~mask);
- return (page_frame | (gla & mask));
-}
-
-static void *
-gla2hla(uint64_t gla, uint64_t guest_cr3)
-{
- uintptr_t gpa;
-
- gpa = gla2gpa(gla, guest_cr3);
-
- return (paddr_guest2host(gpa));
-}
-
-/*
- * Decodes all of the prefixes of the instruction. Only a subset of REX
- * prefixes are currently supported. If any unsupported prefix is
- * encountered, returns -1.
- */
-static int
-decode_prefixes(struct decoded_instruction *decoded)
-{
- uint8_t *current_prefix;
-
- current_prefix = decoded->instruction;
-
- if (is_rex_prefix(*current_prefix)) {
- decoded->rex_w = *current_prefix & REX_W_MASK;
- decoded->rex_r = *current_prefix & REX_R_MASK;
- decoded->rex_x = *current_prefix & REX_X_MASK;
- decoded->rex_b = *current_prefix & REX_B_MASK;
- current_prefix++;
- } else if (is_opsz_prefix(*current_prefix)) {
- decoded->opsz = 1;
- current_prefix++;
- } else if (is_prefix(*current_prefix)) {
- return (-1);
- }
-
- decoded->opcode = current_prefix;
- return (0);
-}
-
-/*
- * Decodes the instruction's opcode. If the opcode is not understood, returns
- * -1 indicating an error. Sets the instruction's mod_rm pointer to the
- * location of the ModR/M field.
- */
-static int
-decode_opcode(struct decoded_instruction *decoded)
-{
- uint8_t opcode;
- uint16_t flags;
- int extra;
-
- opcode = *decoded->opcode;
- extra = 0;
-
- if (opcode != 0xf)
- flags = one_byte_opcodes[opcode];
- else {
- opcode = *(decoded->opcode + 1);
- flags = two_byte_opcodes[opcode];
- extra = 1;
- }
-
- if (!flags)
- return (-1);
-
- if (flags & HAS_MODRM) {
- decoded->modrm = decoded->opcode + 1 + extra;
- }
-
- decoded->opcode_flags = flags;
-
- return (0);
-}
-
-/*
- * Decodes the instruction's ModR/M field. Sets the instruction's sib pointer
- * to the location of the SIB if one is expected to be present, or 0 if not.
- */
-static int
-decode_mod_rm(struct decoded_instruction *decoded)
-{
- uint8_t modrm;
- uint8_t *extension_operands;
-
- if (decoded->modrm) {
- modrm = *decoded->modrm;
-
- decoded->addressing_mode = (modrm & MODRM_MOD_MASK) >> MODRM_MOD_SHIFT;
- decoded->rm = (modrm & MODRM_RM_MASK) >> MODRM_RM_SHIFT;
- decoded->reg = (modrm & MODRM_REG_MASK) >> MODRM_REG_SHIFT;
-
- if (decoded->rex_b)
- decoded->rm |= (1<<3);
-
- if (decoded->rex_r)
- decoded->reg |= (1<<3);
-
- extension_operands = decoded->modrm + 1;
-
- if (decoded->rm == RM_SIB) {
- decoded->sib = decoded->modrm + 1;
- extension_operands = decoded->sib + 1;
- }
-
- switch (decoded->addressing_mode) {
- case MOD_INDIRECT:
- case MOD_DIRECT:
- decoded->displacement = 0;
- break;
- case MOD_INDIRECT_DISP8:
- decoded->displacement = extension_operands;
- break;
- case MOD_INDIRECT_DISP32:
- decoded->displacement = extension_operands;
- break;
- }
- }
-
- return (0);
-}
-
-/*
- * Decodes the instruction's SIB field. No such instructions are currently
- * supported, so do nothing and return -1 if there is a SIB field, 0 otherwise.
- */
-static int
-decode_sib(struct decoded_instruction *decoded)
-{
-
- if (decoded->sib)
- return (-1);
-
- return (0);
-}
-
-/*
- * Grabs and saves the instruction's immediate operand and displacement if
- * they are present. Immediates are not currently supported, so if an
- * immediate is present it will return -1 indicating an error.
- */
-static int
-decode_extension_operands(struct decoded_instruction *decoded)
-{
-
- if (decoded->displacement) {
- if (decoded->addressing_mode == MOD_INDIRECT_DISP8) {
- decoded->disp = *((int8_t *)decoded->displacement);
- } else if (decoded->addressing_mode == MOD_INDIRECT_DISP32) {
- decoded->disp = *((int32_t *)decoded->displacement);
- }
- }
-
- if (decoded->immediate) {
- return (-1);
- }
-
- return (0);
-}
-
-static int
-decode_instruction(void *instr, struct decoded_instruction *decoded)
-{
- int error;
-
- bzero(decoded, sizeof(*decoded));
- decoded->instruction = instr;
-
- error = decode_prefixes(decoded);
- if (error)
- return (error);
-
- error = decode_opcode(decoded);
- if (error)
- return (error);
-
- error = decode_mod_rm(decoded);
- if (error)
- return (error);
-
- error = decode_sib(decoded);
- if (error)
- return (error);
-
- error = decode_extension_operands(decoded);
- if (error)
- return (error);
-
- return (0);
-}
-
-static enum vm_reg_name
-get_vm_reg_name(uint8_t reg)
-{
-
- return (vm_reg_name_mappings[reg]);
-}
-
-static uint64_t
-adjust_operand(const struct decoded_instruction *instruction, uint64_t val,
- int size)
-{
- uint64_t ret;
-
- if (instruction->opcode_flags & ZEXT) {
- switch (size) {
- case 1:
- ret = val & 0xff;
- break;
- case 2:
- ret = val & 0xffff;
- break;
- case 4:
- ret = val & 0xffffffff;
- break;
- case 8:
- ret = val;
- break;
- default:
- break;
- }
- } else {
- /*
- * Extend the sign
- */
- switch (size) {
- case 1:
- ret = (int8_t)(val & 0xff);
- break;
- case 2:
- ret = (int16_t)(val & 0xffff);
- break;
- case 4:
- ret = (int32_t)(val & 0xffffffff);
- break;
- case 8:
- ret = val;
- break;
- default:
- break;
- }
- }
-
- return (ret);
-}
-
-static int
-get_operand(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3,
- const struct decoded_instruction *instruction, uint64_t *operand,
- struct mem_range *mr)
-{
- enum vm_reg_name regname;
- uint64_t reg;
- int error;
- uint8_t rm, addressing_mode, size;
-
- if (instruction->opcode_flags & FROM_RM) {
- rm = instruction->rm;
- addressing_mode = instruction->addressing_mode;
- } else if (instruction->opcode_flags & FROM_REG) {
- rm = instruction->reg;
- addressing_mode = MOD_DIRECT;
- } else
- return (-1);
-
- /*
- * Determine size of operand
- */
- size = 4;
- if (instruction->opcode_flags & FROM_8) {
- size = 1;
- } else if (instruction->opcode_flags & FROM_16 ||
- instruction->opsz) {
- size = 2;
- }
-
- regname = get_vm_reg_name(rm);
- error = vm_get_register(vm, vcpu, regname, &reg);
- if (error)
- return (error);
-
- switch (addressing_mode) {
- case MOD_DIRECT:
- *operand = reg;
- error = 0;
- break;
- case MOD_INDIRECT:
- case MOD_INDIRECT_DISP8:
- case MOD_INDIRECT_DISP32:
-#ifdef INSTR_VERIFY
- {
- uintptr_t target;
-
- target = gla2gpa(reg, guest_cr3);
- target += instruction->disp;
- assert(gpa == target);
- }
-#endif
- error = (*mr->handler)(vm, vcpu, MEM_F_READ, gpa, size,
- operand, mr->arg1, mr->arg2);
- break;
- default:
- return (-1);
- }
-
- if (!error)
- *operand = adjust_operand(instruction, *operand, size);
-
- return (error);
-}
-
-static uint64_t
-adjust_write(uint64_t reg, uint64_t operand, int size)
-{
- uint64_t val;
-
- switch (size) {
- case 1:
- val = (reg & ~0xff) | (operand & 0xff);
- break;
- case 2:
- val = (reg & ~0xffff) | (operand & 0xffff);
- break;
- case 4:
- val = (reg & ~0xffffffff) | (operand & 0xffffffff);
- break;
- case 8:
- val = operand;
- default:
- break;
- }
-
- return (val);
-}
-
-static int
-perform_write(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3,
- const struct decoded_instruction *instruction, uint64_t operand,
- struct mem_range *mr)
-{
- enum vm_reg_name regname;
- uintptr_t target;
- int error;
- int size;
- uint64_t reg;
- uint8_t addressing_mode;
-
- if (instruction->opcode_flags & TO_RM) {
- reg = instruction->rm;
- addressing_mode = instruction->addressing_mode;
- } else if (instruction->opcode_flags & TO_REG) {
- reg = instruction->reg;
- addressing_mode = MOD_DIRECT;
- } else
- return (-1);
-
- /*
- * Determine the operand size. rex.w has priority
- */
- size = 4;
- if (instruction->rex_w) {
- size = 8;
- } else if (instruction->opcode_flags & TO_8) {
- size = 1;
- } else if (instruction->opsz) {
- size = 2;
- };
-
- switch(addressing_mode) {
- case MOD_DIRECT:
- regname = get_vm_reg_name(reg);
- error = vm_get_register(vm, vcpu, regname, &reg);
- if (error)
- return (error);
- operand = adjust_write(reg, operand, size);
-
- return (vm_set_register(vm, vcpu, regname, operand));
- case MOD_INDIRECT:
- case MOD_INDIRECT_DISP8:
- case MOD_INDIRECT_DISP32:
-#ifdef INSTR_VERIFY
- regname = get_vm_reg_name(reg);
- error = vm_get_register(vm, vcpu, regname, &reg);
- assert(!error);
- target = gla2gpa(reg, guest_cr3);
- target += instruction->disp;
- assert(gpa == target);
-#endif
- error = (*mr->handler)(vm, vcpu, MEM_F_WRITE, gpa, size,
- &operand, mr->arg1, mr->arg2);
- return (error);
- default:
- return (-1);
- }
-}
-
-static int
-emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t gpa,
- uint64_t cr3,
- const struct decoded_instruction *instruction,
- struct mem_range *mr)
-{
- uint64_t operand;
- int error;
-
- error = get_operand(vm, vcpu, gpa, cr3, instruction, &operand, mr);
- if (error)
- return (error);
-
- return perform_write(vm, vcpu, gpa, cr3, instruction, operand, mr);
-}
-
-int
-emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3,
- uint64_t gpa, int flags, struct mem_range *mr)
-{
- struct decoded_instruction instr;
- int error;
- void *instruction;
-
- instruction = gla2hla(rip, cr3);
-
- error = decode_instruction(instruction, &instr);
- if (!error)
- error = emulate_decoded_instruction(vm, vcpu, gpa, cr3,
- &instr, mr);
-
- return (error);
-}
diff --git a/usr.sbin/bhyve/instruction_emul.h b/usr.sbin/bhyve/instruction_emul.h
deleted file mode 100644
index ef85796..0000000
--- a/usr.sbin/bhyve/instruction_emul.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*-
- * Copyright (c) 2012 Sandvine, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _INSTRUCTION_EMUL_H_
-#define _INSTRUCTION_EMUL_H_
-
-int emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip,
- uint64_t cr3, uint64_t gpa, int flags,
- struct mem_range *mr);
-
-#endif
diff --git a/usr.sbin/bhyve/ioapic.c b/usr.sbin/bhyve/ioapic.c
index ea6e47c..47dd833 100644
--- a/usr.sbin/bhyve/ioapic.c
+++ b/usr.sbin/bhyve/ioapic.c
@@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
#include "inout.h"
#include "mem.h"
-#include "instruction_emul.h"
#include "fbsdrun.h"
#include <stdio.h>
diff --git a/usr.sbin/bhyve/mem.c b/usr.sbin/bhyve/mem.c
index deb91dc..dc43ff4 100644
--- a/usr.sbin/bhyve/mem.c
+++ b/usr.sbin/bhyve/mem.c
@@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$");
#include <assert.h>
#include "mem.h"
-#include "instruction_emul.h"
struct mmio_rb_range {
RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */
@@ -134,33 +133,57 @@ mmio_rb_dump(void)
RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+static int
+mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
+{
+ int error;
+ struct mem_range *mr = arg;
+
+ error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
+ rval, mr->arg1, mr->arg2);
+ return (error);
+}
+
+static int
+mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
+{
+ int error;
+ struct mem_range *mr = arg;
+
+ error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
+ &wval, mr->arg1, mr->arg2);
+ return (error);
+}
+
int
emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, uint64_t rip,
- uint64_t cr3, int mode)
+ uint64_t cr3, int mode, struct vie *vie)
{
struct mmio_rb_range *entry;
int err;
- err = 0;
-
/*
* First check the per-vCPU cache
*/
if (mmio_hint[vcpu] &&
paddr >= mmio_hint[vcpu]->mr_base &&
paddr <= mmio_hint[vcpu]->mr_end) {
- err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, mode,
- &mmio_hint[vcpu]->mr_param);
- } else {
- if (mmio_rb_lookup(paddr, &entry)) {
- err = ENOENT;
- } else {
- mmio_hint[vcpu] = entry;
- err = emulate_instruction(ctx, vcpu, rip, cr3, paddr,
- mode, &entry->mr_param);
- }
+ entry = mmio_hint[vcpu];
+ } else
+ entry = NULL;
+
+ if (entry == NULL) {
+ if (mmio_rb_lookup(paddr, &entry))
+ return (ESRCH);
+
+ /* Update the per-vCPU cache */
+ mmio_hint[vcpu] = entry;
}
+ assert(entry != NULL && entry == mmio_hint[vcpu]);
+
+ err = vmm_emulate_instruction(ctx, vcpu, paddr, vie,
+ mem_read, mem_write, &entry->mr_param);
return (err);
}
diff --git a/usr.sbin/bhyve/mem.h b/usr.sbin/bhyve/mem.h
index 53c4f72..b34c1fd 100644
--- a/usr.sbin/bhyve/mem.h
+++ b/usr.sbin/bhyve/mem.h
@@ -51,7 +51,7 @@ struct mem_range {
void init_mem(void);
int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, uint64_t rip,
- uint64_t cr3, int mode);
+ uint64_t cr3, int mode, struct vie *vie);
int register_mem(struct mem_range *memp);
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
index 7ac6e3d..28abb6b 100644
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -48,7 +48,6 @@ __FBSDID("$FreeBSD$");
#include <vmmapi.h>
#include "pci_emul.h"
#include "mem.h"
-#include "instruction_emul.h"
#ifndef _PATH_DEVPCI
#define _PATH_DEVPCI "/dev/pci"
OpenPOWER on IntegriCloud