summaryrefslogtreecommitdiffstats
path: root/sys/amd64/vmm/vmm_instruction_emul.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64/vmm/vmm_instruction_emul.c')
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c481
1 files changed, 432 insertions, 49 deletions
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 7ef4dbb..5e5399b 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -30,6 +30,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifdef _KERNEL
#include <sys/param.h>
#include <sys/pcpu.h>
#include <sys/systm.h>
@@ -40,10 +41,60 @@ __FBSDID("$FreeBSD$");
#include <machine/pmap.h>
#include <machine/vmparam.h>
#include <machine/vmm.h>
+#else /* !_KERNEL */
+#include <sys/types.h>
+#include <sys/errno.h>
-#include "vmm_instruction_emul.h"
+#include <machine/vmm.h>
+
+#include <vmmapi.h>
+#endif /* _KERNEL */
+
+
+
+/* struct vie_op.op_type */
+enum {
+ VIE_OP_TYPE_NONE = 0,
+ VIE_OP_TYPE_MOV,
+ VIE_OP_TYPE_AND,
+ VIE_OP_TYPE_LAST
+};
+
+/* struct vie_op.op_flags */
+#define VIE_OP_F_IMM (1 << 0) /* immediate operand present */
+#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */
+
+static const struct vie_op one_byte_opcodes[256] = {
+ [0x89] = {
+ .op_byte = 0x89,
+ .op_type = VIE_OP_TYPE_MOV,
+ },
+ [0x8B] = {
+ .op_byte = 0x8B,
+ .op_type = VIE_OP_TYPE_MOV,
+ },
+ [0xC7] = {
+ .op_byte = 0xC7,
+ .op_type = VIE_OP_TYPE_MOV,
+ .op_flags = VIE_OP_F_IMM,
+ },
+ [0x23] = {
+ .op_byte = 0x23,
+ .op_type = VIE_OP_TYPE_AND,
+ }
+};
+
+/* struct vie.mod */
+#define VIE_MOD_INDIRECT 0
+#define VIE_MOD_INDIRECT_DISP8 1
+#define VIE_MOD_INDIRECT_DISP32 2
+#define VIE_MOD_DIRECT 3
-#define GB (1024 * 1024 * 1024)
+/* struct vie.rm */
+#define VIE_RM_SIB 4
+#define VIE_RM_DISP32 5
+
+#define GB (1024 * 1024 * 1024)
static enum vm_reg_name gpr_map[16] = {
VM_REG_GUEST_RAX,
@@ -64,17 +115,232 @@ static enum vm_reg_name gpr_map[16] = {
VM_REG_GUEST_R15
};
+static uint64_t size2mask[] = {
+ [1] = 0xff,
+ [2] = 0xffff,
+ [4] = 0xffffffff,
+ [8] = 0xffffffffffffffff,
+};
+
+static int
+vie_valid_register(enum vm_reg_name reg)
+{
+#ifdef _KERNEL
+ /*
+ * XXX
+ * The operand register in which we store the result of the
+ * read must be a GPR that we can modify even if the vcpu
+ * is "running". All the GPRs qualify except for %rsp.
+ *
+ * This is a limitation of the vm_set_register() API
+ * and can be fixed if necessary.
+ */
+ if (reg == VM_REG_GUEST_RSP)
+ return (0);
+#endif
+ return (1);
+}
+
+static int
+vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval)
+{
+ int error;
+
+ if (!vie_valid_register(reg))
+ return (EINVAL);
+
+ error = vm_get_register(vm, vcpuid, reg, rval);
+
+ return (error);
+}
+
+static int
+vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
+ uint64_t val, int size)
+{
+ int error;
+ uint64_t origval;
+
+ if (!vie_valid_register(reg))
+ return (EINVAL);
+
+ switch (size) {
+ case 1:
+ case 2:
+ error = vie_read_register(vm, vcpuid, reg, &origval);
+ if (error)
+ return (error);
+ val &= size2mask[size];
+ val |= origval & ~size2mask[size];
+ break;
+ case 4:
+ val &= 0xffffffffUL;
+ break;
+ case 8:
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ error = vm_set_register(vm, vcpuid, reg, val);
+ return (error);
+}
+
+/*
+ * The following simplifying assumptions are made during emulation:
+ *
+ * - guest is in 64-bit mode
+ * - default address size is 64-bits
+ * - default operand size is 32-bits
+ *
+ * - operand size override is not supported
+ *
+ * - address size override is not supported
+ */
+static int
+emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+ int error, size;
+ enum vm_reg_name reg;
+ uint64_t val;
+
+ size = 4;
+ error = EINVAL;
+
+ switch (vie->op.op_byte) {
+ case 0x89:
+ /*
+ * MOV from reg (ModRM:reg) to mem (ModRM:r/m)
+ * 89/r: mov r/m32, r32
+ * REX.W + 89/r mov r/m64, r64
+ */
+ if (vie->rex_w)
+ size = 8;
+ reg = gpr_map[vie->reg];
+ error = vie_read_register(vm, vcpuid, reg, &val);
+ if (error == 0) {
+ val &= size2mask[size];
+ error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ }
+ break;
+ case 0x8B:
+ /*
+ * MOV from mem (ModRM:r/m) to reg (ModRM:reg)
+ * 8B/r: mov r32, r/m32
+ * REX.W 8B/r: mov r64, r/m64
+ */
+ if (vie->rex_w)
+ size = 8;
+ error = memread(vm, vcpuid, gpa, &val, size, arg);
+ if (error == 0) {
+ reg = gpr_map[vie->reg];
+ error = vie_update_register(vm, vcpuid, reg, val, size);
+ }
+ break;
+ case 0xC7:
+ /*
+ * MOV from imm32 to mem (ModRM:r/m)
+ * C7/0 mov r/m32, imm32
+ * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits)
+ */
+ val = vie->immediate; /* already sign-extended */
+
+ if (vie->rex_w)
+ size = 8;
+
+ if (size != 8)
+ val &= size2mask[size];
+
+ error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ break;
+ default:
+ break;
+ }
+
+ return (error);
+}
+
+static int
+emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+ int error, size;
+ enum vm_reg_name reg;
+ uint64_t val1, val2;
+
+ size = 4;
+ error = EINVAL;
+
+ switch (vie->op.op_byte) {
+ case 0x23:
+ /*
+ * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the
+ * result in reg.
+ *
+ * 23/r and r32, r/m32
+ * REX.W + 23/r and r64, r/m64
+ */
+ if (vie->rex_w)
+ size = 8;
+
+ /* get the first operand */
+ reg = gpr_map[vie->reg];
+ error = vie_read_register(vm, vcpuid, reg, &val1);
+ if (error)
+ break;
+
+ /* get the second operand */
+ error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ if (error)
+ break;
+
+ /* perform the operation and write the result */
+ val1 &= val2;
+ error = vie_update_register(vm, vcpuid, reg, val1, size);
+ break;
+ default:
+ break;
+ }
+ return (error);
+}
+
+int
+vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite,
+ void *memarg)
+{
+ int error;
+
+ if (!vie->decoded)
+ return (EINVAL);
+
+ switch (vie->op.op_type) {
+ case VIE_OP_TYPE_MOV:
+ error = emulate_mov(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
+ case VIE_OP_TYPE_AND:
+ error = emulate_and(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+#ifdef _KERNEL
static void
vie_init(struct vie *vie)
{
bzero(vie, sizeof(struct vie));
- vie->op_size = VIE_OP_SIZE_32BIT;
-
vie->base_register = VM_REG_LAST;
vie->index_register = VM_REG_LAST;
- vie->operand_register = VM_REG_LAST;
}
static int
@@ -129,7 +395,7 @@ error:
}
int
-vmm_fetch_instruction(struct vm *vm, uint64_t rip, int inst_length,
+vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
uint64_t cr3, struct vie *vie)
{
int n, err;
@@ -172,6 +438,7 @@ vmm_fetch_instruction(struct vm *vm, uint64_t rip, int inst_length,
static int
vie_peek(struct vie *vie, uint8_t *x)
{
+
if (vie->num_processed < vie->num_valid) {
*x = vie->inst[vie->num_processed];
return (0);
@@ -182,8 +449,6 @@ vie_peek(struct vie *vie, uint8_t *x)
static void
vie_advance(struct vie *vie)
{
- if (vie->num_processed >= vie->num_valid)
- panic("vie_advance: %d/%d", vie->num_processed, vie->num_valid);
vie->num_processed++;
}
@@ -213,24 +478,16 @@ decode_opcode(struct vie *vie)
{
uint8_t x;
- static const uint8_t flags[256] = {
- [0x89] = VIE_F_HAS_MODRM | VIE_F_FROM_REG | VIE_F_TO_RM,
- [0x8B] = VIE_F_HAS_MODRM | VIE_F_FROM_RM | VIE_F_TO_REG,
- [0xC7] = VIE_F_HAS_MODRM | VIE_F_FROM_IMM | VIE_F_TO_RM,
- };
-
if (vie_peek(vie, &x))
return (-1);
- vie->opcode_byte = x;
- vie->opcode_flags = flags[x];
+ vie->op = one_byte_opcodes[x];
- vie_advance(vie);
-
- if (vie->opcode_flags == 0)
+ if (vie->op.op_type == VIE_OP_TYPE_NONE)
return (-1);
- else
- return (0);
+
+ vie_advance(vie);
+ return (0);
}
/*
@@ -241,9 +498,6 @@ decode_modrm(struct vie *vie)
{
uint8_t x;
- if ((vie->opcode_flags & VIE_F_HAS_MODRM) == 0)
- return (0);
-
if (vie_peek(vie, &x))
return (-1);
@@ -251,35 +505,40 @@ decode_modrm(struct vie *vie)
vie->rm = (x >> 0) & 0x7;
vie->reg = (x >> 3) & 0x7;
+ /*
+ * A direct addressing mode makes no sense in the context of an EPT
+ * fault. There has to be a memory access involved to cause the
+ * EPT fault.
+ */
+ if (vie->mod == VIE_MOD_DIRECT)
+ return (-1);
+
if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
(vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
- /*
- * Table 2-5: Special Cases of REX Encodings
- *
- * mod=0, r/m=5 is used in the compatibility mode to
- * indicate a disp32 without a base register.
- *
- * mod!=3, r/m=4 is used in the compatibility mode to
- * indicate that the SIB byte is present.
- *
- * The 'b' bit in the REX prefix is don't care in
- * this case.
- */
+ /*
+ * Table 2-5: Special Cases of REX Encodings
+ *
+ * mod=0, r/m=5 is used in the compatibility mode to
+ * indicate a disp32 without a base register.
+ *
+ * mod!=3, r/m=4 is used in the compatibility mode to
+ * indicate that the SIB byte is present.
+ *
+ * The 'b' bit in the REX prefix is don't care in
+ * this case.
+ */
} else {
vie->rm |= (vie->rex_b << 3);
}
vie->reg |= (vie->rex_r << 3);
- /* SIB addressing not supported yet */
+ /* SIB */
if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
- return (-1);
+ goto done;
vie->base_register = gpr_map[vie->rm];
- if (vie->opcode_flags & (VIE_F_FROM_REG | VIE_F_TO_REG))
- vie->operand_register = gpr_map[vie->reg];
-
switch (vie->mod) {
case VIE_MOD_INDIRECT_DISP8:
vie->disp_bytes = 1;
@@ -295,12 +554,76 @@ decode_modrm(struct vie *vie)
break;
}
- /* calculate the operand size */
- if (vie->rex_w)
- vie->op_size = VIE_OP_SIZE_64BIT;
-
- if (vie->opcode_flags & VIE_F_FROM_IMM)
+ /* Figure out immediate operand size (if any) */
+ if (vie->op.op_flags & VIE_OP_F_IMM)
vie->imm_bytes = 4;
+ else if (vie->op.op_flags & VIE_OP_F_IMM8)
+ vie->imm_bytes = 1;
+
+done:
+ vie_advance(vie);
+
+ return (0);
+}
+
+static int
+decode_sib(struct vie *vie)
+{
+ uint8_t x;
+
+ /* Proceed only if SIB byte is present */
+ if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB)
+ return (0);
+
+ if (vie_peek(vie, &x))
+ return (-1);
+
+ /* De-construct the SIB byte */
+ vie->ss = (x >> 6) & 0x3;
+ vie->index = (x >> 3) & 0x7;
+ vie->base = (x >> 0) & 0x7;
+
+ /* Apply the REX prefix modifiers */
+ vie->index |= vie->rex_x << 3;
+ vie->base |= vie->rex_b << 3;
+
+ switch (vie->mod) {
+ case VIE_MOD_INDIRECT_DISP8:
+ vie->disp_bytes = 1;
+ break;
+ case VIE_MOD_INDIRECT_DISP32:
+ vie->disp_bytes = 4;
+ break;
+ }
+
+ if (vie->mod == VIE_MOD_INDIRECT &&
+ (vie->base == 5 || vie->base == 13)) {
+ /*
+ * Special case when base register is unused if mod = 0
+ * and base = %rbp or %r13.
+ *
+ * Documented in:
+ * Table 2-3: 32-bit Addressing Forms with the SIB Byte
+ * Table 2-5: Special Cases of REX Encodings
+ */
+ vie->disp_bytes = 4;
+ } else {
+ vie->base_register = gpr_map[vie->base];
+ }
+
+ /*
+ * All encodings of 'index' are valid except for %rsp (4).
+ *
+ * Documented in:
+ * Table 2-3: 32-bit Addressing Forms with the SIB Byte
+ * Table 2-5: Special Cases of REX Encodings
+ */
+ if (vie->index != 4)
+ vie->index_register = gpr_map[vie->index];
+
+ /* 'scale' makes sense only in the context of an index register */
+ if (vie->index_register < VM_REG_LAST)
+ vie->scale = 1 << vie->ss;
vie_advance(vie);
@@ -348,13 +671,14 @@ decode_immediate(struct vie *vie)
uint8_t x;
union {
char buf[4];
+ int8_t signed8;
int32_t signed32;
} u;
if ((n = vie->imm_bytes) == 0)
return (0);
- if (n != 4)
+ if (n != 1 && n != 4)
panic("decode_immediate: invalid imm_bytes %d", n);
for (i = 0; i < n; i++) {
@@ -365,14 +689,62 @@ decode_immediate(struct vie *vie)
vie_advance(vie);
}
- vie->immediate = u.signed32; /* sign-extended */
+ if (n == 1)
+ vie->immediate = u.signed8; /* sign-extended */
+ else
+ vie->immediate = u.signed32; /* sign-extended */
return (0);
}
+#define VERIFY_GLA
+/*
+ * Verify that the 'guest linear address' provided as collateral of the nested
+ * page table fault matches with our instruction decoding.
+ */
+#ifdef VERIFY_GLA
+static int
+verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
+{
+ int error;
+ uint64_t base, idx;
+
+ base = 0;
+ if (vie->base_register != VM_REG_LAST) {
+ error = vm_get_register(vm, cpuid, vie->base_register, &base);
+ if (error) {
+ printf("verify_gla: error %d getting base reg %d\n",
+ error, vie->base_register);
+ return (-1);
+ }
+ }
+
+ idx = 0;
+ if (vie->index_register != VM_REG_LAST) {
+ error = vm_get_register(vm, cpuid, vie->index_register, &idx);
+ if (error) {
+ printf("verify_gla: error %d getting index reg %d\n",
+ error, vie->index_register);
+ return (-1);
+ }
+ }
+
+ if (base + vie->scale * idx + vie->displacement != gla) {
+ printf("verify_gla mismatch: "
+ "base(0x%0lx), scale(%d), index(0x%0lx), "
+ "disp(0x%0lx), gla(0x%0lx)\n",
+ base, vie->scale, idx, vie->displacement, gla);
+ return (-1);
+ }
+
+ return (0);
+}
+#endif /* VERIFY_GLA */
+
int
-vmm_decode_instruction(struct vie *vie)
+vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
{
+
if (decode_rex(vie))
return (-1);
@@ -382,11 +754,22 @@ vmm_decode_instruction(struct vie *vie)
if (decode_modrm(vie))
return (-1);
+ if (decode_sib(vie))
+ return (-1);
+
if (decode_displacement(vie))
return (-1);
if (decode_immediate(vie))
return (-1);
+#ifdef VERIFY_GLA
+ if (verify_gla(vm, cpuid, gla, vie))
+ return (-1);
+#endif
+
+ vie->decoded = 1; /* success */
+
return (0);
}
+#endif /* _KERNEL */
OpenPOWER on IntegriCloud