diff options
Diffstat (limited to 'sys/amd64/vmm/vmm_instruction_emul.c')
-rw-r--r-- | sys/amd64/vmm/vmm_instruction_emul.c | 672 |
1 files changed, 528 insertions, 144 deletions
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index 921deb5..a65b125 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #else /* !_KERNEL */ #include <sys/types.h> #include <sys/errno.h> +#include <sys/_iovec.h> #include <machine/vmm.h> @@ -65,18 +66,26 @@ enum { VIE_OP_TYPE_AND, VIE_OP_TYPE_OR, VIE_OP_TYPE_TWO_BYTE, + VIE_OP_TYPE_PUSH, + VIE_OP_TYPE_CMP, VIE_OP_TYPE_LAST }; /* struct vie_op.op_flags */ -#define VIE_OP_F_IMM (1 << 0) /* immediate operand present */ -#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ +#define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ +#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ +#define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ +#define VIE_OP_F_NO_MODRM (1 << 3) static const struct vie_op two_byte_opcodes[256] = { [0xB6] = { .op_byte = 0xB6, .op_type = VIE_OP_TYPE_MOVZX, }, + [0xB7] = { + .op_byte = 0xB7, + .op_type = VIE_OP_TYPE_MOVZX, + }, [0xBE] = { .op_byte = 0xBE, .op_type = VIE_OP_TYPE_MOVSX, @@ -88,6 +97,10 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x0F, .op_type = VIE_OP_TYPE_TWO_BYTE }, + [0x3B] = { + .op_byte = 0x3B, + .op_type = VIE_OP_TYPE_CMP, + }, [0x88] = { .op_byte = 0x88, .op_type = VIE_OP_TYPE_MOV, @@ -104,6 +117,22 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x8B, .op_type = VIE_OP_TYPE_MOV, }, + [0xA1] = { + .op_byte = 0xA1, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, + [0xA3] = { + .op_byte = 0xA3, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, + [0xC6] = { + /* XXX Group 11 extended opcode - not just MOV */ + .op_byte = 0xC6, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_IMM8, + }, [0xC7] = { .op_byte = 0xC7, .op_type = VIE_OP_TYPE_MOV, @@ -125,6 +154,11 @@ static const struct vie_op one_byte_opcodes[256] = { .op_type = VIE_OP_TYPE_OR, .op_flags = VIE_OP_F_IMM8, }, + [0xFF] = { + /* XXX Group 5 extended opcode - not just PUSH */ + .op_byte = 0xFF, + .op_type = VIE_OP_TYPE_PUSH, + } }; /* struct vie.mod */ @@ -175,18 +209,15 @@ vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) return (error); } -static int -vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) +static void +vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr) { - uint64_t val; - int error, rshift; - enum vm_reg_name reg; - - rshift = 0; - reg = gpr_map[vie->reg]; + *lhbr = 0; + *reg = gpr_map[vie->reg]; /* - * 64-bit mode imposes limitations on accessing legacy byte registers. + * 64-bit mode imposes limitations on accessing legacy high byte + * registers (lhbr). * * The legacy high-byte registers cannot be addressed if the REX * prefix is present. In this case the values 4, 5, 6 and 7 of the @@ -198,17 +229,56 @@ vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) */ if (!vie->rex_present) { if (vie->reg & 0x4) { - /* - * Obtain the value of %ah by reading %rax and shifting - * right by 8 bits (same for %bh, %ch and %dh). - */ - rshift = 8; - reg = gpr_map[vie->reg & 0x3]; + *lhbr = 1; + *reg = gpr_map[vie->reg & 0x3]; } } +} + +static int +vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) +{ + uint64_t val; + int error, lhbr; + enum vm_reg_name reg; + vie_calc_bytereg(vie, ®, &lhbr); error = vm_get_register(vm, vcpuid, reg, &val); - *rval = val >> rshift; + + /* + * To obtain the value of a legacy high byte register shift the + * base register right by 8 bits (%ah = %rax >> 8). + */ + if (lhbr) + *rval = val >> 8; + else + *rval = val; + return (error); +} + +static int +vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) +{ + uint64_t origval, val, mask; + int error, lhbr; + enum vm_reg_name reg; + + vie_calc_bytereg(vie, ®, &lhbr); + error = vm_get_register(vm, vcpuid, reg, &origval); + if (error == 0) { + val = byte; + mask = 0xff; + if (lhbr) { + /* + * Shift left by 8 to store 'byte' in a legacy high + * byte register. + */ + val <<= 8; + mask <<= 8; + } + val |= origval & ~mask; + error = vm_set_register(vm, vcpuid, reg, val); + } return (error); } @@ -242,16 +312,52 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, } /* - * The following simplifying assumptions are made during emulation: - * - * - guest is in 64-bit mode - * - default address size is 64-bits - * - default operand size is 32-bits - * - * - operand size override is not supported - * - * - address size override is not supported + * Return the status flags that would result from doing (x - y). */ +static u_long +getcc16(uint16_t x, uint16_t y) +{ + u_long rflags; + + __asm __volatile("sub %1,%2; pushfq; popq %0" : + "=r" (rflags) : "m" (y), "r" (x)); + return (rflags); +} + +static u_long +getcc32(uint32_t x, uint32_t y) +{ + u_long rflags; + + __asm __volatile("sub %1,%2; pushfq; popq %0" : + "=r" (rflags) : "m" (y), "r" (x)); + return (rflags); +} + +static u_long +getcc64(uint64_t x, uint64_t y) +{ + u_long rflags; + + __asm __volatile("sub %1,%2; pushfq; popq %0" : + "=r" (rflags) : "m" (y), "r" (x)); + return (rflags); +} + +static u_long +getcc(int opsize, uint64_t x, uint64_t y) +{ + KASSERT(opsize == 2 || opsize == 4 || opsize == 8, + ("getcc: invalid operand size %d", opsize)); + + if (opsize == 2) + return (getcc16(x, y)); + else if (opsize == 4) + return (getcc32(x, y)); + else + return (getcc64(x, y)); +} + static int emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) @@ -261,7 +367,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, uint8_t byte; uint64_t val; - size = 4; + size = vie->opsize; error = EINVAL; switch (vie->op.op_byte) { @@ -271,7 +377,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * 88/r: mov r/m8, r8 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) */ - size = 1; + size = 1; /* override for byte operation */ error = vie_read_bytereg(vm, vcpuid, vie, &byte); if (error == 0) error = memwrite(vm, vcpuid, gpa, byte, size, arg); @@ -279,11 +385,10 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, case 0x89: /* * MOV from reg (ModRM:reg) to mem (ModRM:r/m) + * 89/r: mov r/m16, r16 * 89/r: mov r/m32, r32 * REX.W + 89/r mov r/m64, r64 */ - if (vie->rex_w) - size = 8; reg = gpr_map[vie->reg]; error = vie_read_register(vm, vcpuid, reg, &val); if (error == 0) { @@ -292,38 +397,72 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } break; case 0x8A: + /* + * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg) + * 8A/r: mov r8, r/m8 + * REX + 8A/r: mov r8, r/m8 + */ + size = 1; /* override for byte operation */ + error = memread(vm, vcpuid, gpa, &val, size, arg); + if (error == 0) + error = vie_write_bytereg(vm, vcpuid, vie, val); + break; case 0x8B: /* * MOV from mem (ModRM:r/m) to reg (ModRM:reg) - * 8A/r: mov r/m8, r8 - * REX + 8A/r: mov r/m8, r8 + * 8B/r: mov r16, r/m16 * 8B/r: mov r32, r/m32 * REX.W 8B/r: mov r64, r/m64 */ - if (vie->op.op_byte == 0x8A) - size = 1; - else if (vie->rex_w) - size = 8; error = memread(vm, vcpuid, gpa, &val, size, arg); if (error == 0) { reg = gpr_map[vie->reg]; error = vie_update_register(vm, vcpuid, reg, val, size); } break; + case 0xA1: + /* + * MOV from seg:moffset to AX/EAX/RAX + * A1: mov AX, moffs16 + * A1: mov EAX, moffs32 + * REX.W + A1: mov RAX, moffs64 + */ + error = memread(vm, vcpuid, gpa, &val, size, arg); + if (error == 0) { + reg = VM_REG_GUEST_RAX; + error = vie_update_register(vm, vcpuid, reg, val, size); + } + break; + case 0xA3: + /* + * MOV from AX/EAX/RAX to seg:moffset + * A3: mov moffs16, AX + * A3: mov moffs32, EAX + * REX.W + A3: mov moffs64, RAX + */ + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); + if (error == 0) { + val &= size2mask[size]; + error = memwrite(vm, vcpuid, gpa, val, size, arg); + } + break; + case 0xC6: + /* + * MOV from imm8 to mem (ModRM:r/m) + * C6/0 mov r/m8, imm8 + * REX + C6/0 mov r/m8, imm8 + */ + size = 1; /* override for byte operation */ + error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg); + break; case 0xC7: /* - * MOV from imm32 to mem (ModRM:r/m) + * MOV from imm16/imm32 to mem (ModRM:r/m) + * C7/0 mov r/m16, imm16 * C7/0 mov r/m32, imm32 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) */ - val = vie->immediate; /* already sign-extended */ - - if (vie->rex_w) - size = 8; - - if (size != 8) - val &= size2mask[size]; - + val = vie->immediate & size2mask[size]; error = memwrite(vm, vcpuid, gpa, val, size, arg); break; default: @@ -333,17 +472,6 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (error); } -/* - * The following simplifying assumptions are made during emulation: - * - * - guest is in 64-bit mode - * - default address size is 64-bits - * - default operand size is 32-bits - * - * - operand size override is not supported - * - * - address size override is not supported - */ static int emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, @@ -353,7 +481,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, enum vm_reg_name reg; uint64_t val; - size = 4; + size = vie->opsize; error = EINVAL; switch (vie->op.op_byte) { @@ -362,8 +490,9 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * MOV and zero extend byte from mem (ModRM:r/m) to * reg (ModRM:reg). * - * 0F B6/r movzx r/m8, r32 - * REX.W + 0F B6/r movzx r/m8, r64 + * 0F B6/r movzx r16, r/m8 + * 0F B6/r movzx r32, r/m8 + * REX.W + 0F B6/r movzx r64, r/m8 */ /* get the first operand */ @@ -374,19 +503,39 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the second operand */ reg = gpr_map[vie->reg]; - if (vie->rex_w) - size = 8; + /* zero-extend byte */ + val = (uint8_t)val; /* write the result */ error = vie_update_register(vm, vcpuid, reg, val, size); break; + case 0xB7: + /* + * MOV and zero extend word from mem (ModRM:r/m) to + * reg (ModRM:reg). + * + * 0F B7/r movzx r32, r/m16 + * REX.W + 0F B7/r movzx r64, r/m16 + */ + error = memread(vm, vcpuid, gpa, &val, 2, arg); + if (error) + return (error); + + reg = gpr_map[vie->reg]; + + /* zero-extend word */ + val = (uint16_t)val; + + error = vie_update_register(vm, vcpuid, reg, val, size); + break; case 0xBE: /* * MOV and sign extend byte from mem (ModRM:r/m) to * reg (ModRM:reg). * - * 0F BE/r movsx r/m8, r32 - * REX.W + 0F BE/r movsx r/m8, r64 + * 0F BE/r movsx r16, r/m8 + * 0F BE/r movsx r32, r/m8 + * REX.W + 0F BE/r movsx r64, r/m8 */ /* get the first operand */ @@ -397,9 +546,6 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the second operand */ reg = gpr_map[vie->reg]; - if (vie->rex_w) - size = 8; - /* sign extend byte */ val = (int8_t)val; @@ -420,7 +566,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, enum vm_reg_name reg; uint64_t val1, val2; - size = 4; + size = vie->opsize; error = EINVAL; switch (vie->op.op_byte) { @@ -429,11 +575,10 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the * result in reg. * + * 23/r and r16, r/m16 * 23/r and r32, r/m32 * REX.W + 23/r and r64, r/m64 */ - if (vie->rex_w) - size = 8; /* get the first operand */ reg = gpr_map[vie->reg]; @@ -455,8 +600,9 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * AND mem (ModRM:r/m) with immediate and store the * result in mem. * - * 81/ and r/m32, imm32 - * REX.W + 81/ and r/m64, imm32 sign-extended to 64 + * 81 /4 and r/m16, imm16 + * 81 /4 and r/m32, imm32 + * REX.W + 81 /4 and r/m64, imm32 sign-extended to 64 * * Currently, only the AND operation of the 0x81 opcode * is implemented (ModRM:reg = b100). @@ -464,9 +610,6 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if ((vie->reg & 7) != 4) break; - if (vie->rex_w) - size = 8; - /* get the first operand */ error = memread(vm, vcpuid, gpa, &val1, size, arg); if (error) @@ -492,7 +635,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, int error, size; uint64_t val1; - size = 4; + size = vie->opsize; error = EINVAL; switch (vie->op.op_byte) { @@ -501,8 +644,9 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * OR mem (ModRM:r/m) with immediate and store the * result in mem. * - * 83/ OR r/m32, imm8 sign-extended to 32 - * REX.W + 83/ OR r/m64, imm8 sign-extended to 64 + * 83 /1 OR r/m16, imm8 sign-extended to 16 + * 83 /1 OR r/m32, imm8 sign-extended to 32 + * REX.W + 83/1 OR r/m64, imm8 sign-extended to 64 * * Currently, only the OR operation of the 0x83 opcode * is implemented (ModRM:reg = b001). @@ -510,9 +654,6 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if ((vie->reg & 7) != 1) break; - if (vie->rex_w) - size = 8; - /* get the first operand */ error = memread(vm, vcpuid, gpa, &val1, size, arg); if (error) @@ -531,10 +672,167 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (error); } +#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V) + +static int +emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +{ + int error, size; + uint64_t op1, op2, rflags, rflags2; + enum vm_reg_name reg; + + size = vie->opsize; + switch (vie->op.op_byte) { + case 0x3B: + /* + * 3B/r CMP r16, r/m16 + * 3B/r CMP r32, r/m32 + * REX.W + 3B/r CMP r64, r/m64 + * + * Compare first operand (reg) with second operand (r/m) and + * set status flags in EFLAGS register. The comparison is + * performed by subtracting the second operand from the first + * operand and then setting the status flags. + */ + + /* Get the first operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vm, vcpuid, reg, &op1); + if (error) + return (error); + + /* Get the second operand */ + error = memread(vm, vcpuid, gpa, &op2, size, arg); + if (error) + return (error); + + break; + default: + return (EINVAL); + } + rflags2 = getcc(size, op1, op2); + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + if (error) + return (error); + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & RFLAGS_STATUS_BITS; + + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + return (error); +} + +static int +emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ +#ifdef _KERNEL + struct vm_copyinfo copyinfo[2]; +#else + struct iovec copyinfo[2]; +#endif + struct seg_desc ss_desc; + uint64_t cr0, rflags, rsp, stack_gla, val; + int error, size, stackaddrsize; + + /* + * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. + * + * PUSH is part of the group 5 extended opcodes and is identified + * by ModRM:reg = b110. + */ + if ((vie->reg & 7) != 6) + return (EINVAL); + + size = vie->opsize; + /* + * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 + */ + if (paging->cpu_mode == CPU_MODE_REAL) { + stackaddrsize = 2; + } else if (paging->cpu_mode == CPU_MODE_64BIT) { + /* + * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3 + * - Stack pointer size is always 64-bits. + * - PUSH/POP of 32-bit values is not possible in 64-bit mode. + * - 16-bit PUSH/POP is supported by using the operand size + * override prefix (66H). + */ + stackaddrsize = 8; + size = vie->opsize_override ? 2 : 8; + } else { + /* + * In protected or compability mode the 'B' flag in the + * stack-segment descriptor determines the size of the + * stack pointer. + */ + error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc); + KASSERT(error == 0, ("%s: error %d getting SS descriptor", + __func__, error)); + if (SEG_DESC_DEF32(ss_desc.access)) + stackaddrsize = 4; + else + stackaddrsize = 2; + } + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); + KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); + KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); + + rsp -= size; + if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, + rsp, size, stackaddrsize, PROT_WRITE, &stack_gla)) { + vm_inject_ss(vm, vcpuid, 0); + return (0); + } + + if (vie_canonical_check(paging->cpu_mode, stack_gla)) { + vm_inject_ss(vm, vcpuid, 0); + return (0); + } + + if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { + vm_inject_ac(vm, vcpuid, 0); + return (0); + } + + error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, PROT_WRITE, + copyinfo, nitems(copyinfo)); + if (error == -1) { + /* + * XXX cannot return a negative error value here because it + * ends up being the return value of the VM_RUN() ioctl and + * is interpreted as a pseudo-error (for e.g. ERESTART). + */ + return (EFAULT); + } else if (error == 1) { + /* Resume guest execution to handle page fault */ + return (0); + } + + error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); + if (error == 0) { + vm_copyout(vm, vcpuid, &val, copyinfo, size); + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, + stackaddrsize); + KASSERT(error == 0, ("error %d updating rsp", error)); + } +#ifdef _KERNEL + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); +#endif + return (error); +} + int vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, - void *memarg) + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *memarg) { int error; @@ -542,6 +840,14 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (EINVAL); switch (vie->op.op_type) { + case VIE_OP_TYPE_PUSH: + error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, + memwrite, memarg); + break; + case VIE_OP_TYPE_CMP: + error = emulate_cmp(vm, vcpuid, gpa, vie, + memread, memwrite, memarg); + break; case VIE_OP_TYPE_MOV: error = emulate_mov(vm, vcpuid, gpa, vie, memread, memwrite, memarg); @@ -636,7 +942,7 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, * then the descriptor is unusable and attempting to use * it results in a #GP(0). */ - if (SEG_DESC_UNUSABLE(desc)) + if (SEG_DESC_UNUSABLE(desc->access)) return (-1); /* @@ -645,13 +951,13 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, * descriptor that is not present. If this was the case then * it would have been checked before the VM-exit. */ - KASSERT(SEG_DESC_PRESENT(desc), ("segment %d not present: %#x", - seg, desc->access)); + KASSERT(SEG_DESC_PRESENT(desc->access), + ("segment %d not present: %#x", seg, desc->access)); /* * The descriptor type must indicate a code/data segment. */ - type = SEG_DESC_TYPE(desc); + type = SEG_DESC_TYPE(desc->access); KASSERT(type >= 16 && type <= 31, ("segment %d has invalid " "descriptor type %#x", seg, type)); @@ -680,7 +986,8 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, if ((type & 0xC) == 0x4) { /* expand-down data segment */ low_limit = desc->limit + 1; - high_limit = SEG_DESC_DEF32(desc) ? 0xffffffff : 0xffff; + high_limit = SEG_DESC_DEF32(desc->access) ? + 0xffffffff : 0xffff; } else { /* code segment or expand-up data segment */ low_limit = 0; @@ -947,45 +1254,24 @@ fault: } int -vmm_fetch_instruction(struct vm *vm, int cpuid, struct vm_guest_paging *paging, +vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t rip, int inst_length, struct vie *vie) { - int n, error, prot; - uint64_t gpa, off; - void *hpa, *cookie; - - /* - * XXX cache previously fetched instructions using 'rip' as the tag - */ + struct vm_copyinfo copyinfo[2]; + int error, prot; - prot = VM_PROT_READ | VM_PROT_EXECUTE; if (inst_length > VIE_INST_SIZE) panic("vmm_fetch_instruction: invalid length %d", inst_length); - /* Copy the instruction into 'vie' */ - while (vie->num_valid < inst_length) { - error = vmm_gla2gpa(vm, cpuid, paging, rip, prot, &gpa); - if (error) - return (error); - - off = gpa & PAGE_MASK; - n = min(inst_length - vie->num_valid, PAGE_SIZE - off); - - if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL) - break; - - bcopy(hpa, &vie->inst[vie->num_valid], n); - - vm_gpa_release(cookie); - - rip += n; - vie->num_valid += n; + prot = PROT_READ | PROT_EXEC; + error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, + copyinfo, nitems(copyinfo)); + if (error == 0) { + vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + vie->num_valid = inst_length; } - - if (vie->num_valid == inst_length) - return (0); - else - return (-1); + return (error); } static int @@ -1007,24 +1293,65 @@ vie_advance(struct vie *vie) } static int -decode_rex(struct vie *vie) +decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) { uint8_t x; - if (vie_peek(vie, &x)) - return (-1); + while (1) { + if (vie_peek(vie, &x)) + return (-1); - if (x >= 0x40 && x <= 0x4F) { - vie->rex_present = 1; + if (x == 0x66) + vie->opsize_override = 1; + else if (x == 0x67) + vie->addrsize_override = 1; + else + break; + + vie_advance(vie); + } + /* + * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2: + * - Only one REX prefix is allowed per instruction. + * - The REX prefix must immediately precede the opcode byte or the + * escape opcode byte. + * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3) + * the mandatory prefix must come before the REX prefix. + */ + if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) { + vie->rex_present = 1; vie->rex_w = x & 0x8 ? 1 : 0; vie->rex_r = x & 0x4 ? 1 : 0; vie->rex_x = x & 0x2 ? 1 : 0; vie->rex_b = x & 0x1 ? 1 : 0; - vie_advance(vie); } + /* + * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1 + */ + if (cpu_mode == CPU_MODE_64BIT) { + /* + * Default address size is 64-bits and default operand size + * is 32-bits. + */ + vie->addrsize = vie->addrsize_override ? 4 : 8; + if (vie->rex_w) + vie->opsize = 8; + else if (vie->opsize_override) + vie->opsize = 2; + else + vie->opsize = 4; + } else if (cs_d) { + /* Default address and operand sizes are 32-bits */ + vie->addrsize = vie->addrsize_override ? 2 : 4; + vie->opsize = vie->opsize_override ? 2 : 4; + } else { + /* Default address and operand sizes are 16-bits */ + vie->addrsize = vie->addrsize_override ? 4 : 2; + vie->opsize = vie->opsize_override ? 4 : 2; + } return (0); } @@ -1071,6 +1398,12 @@ decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode) { uint8_t x; + if (cpu_mode == CPU_MODE_REAL) + return (-1); + + if (vie->op.op_flags & VIE_OP_F_NO_MODRM) + return (0); + if (vie_peek(vie, &x)) return (-1); @@ -1249,20 +1582,32 @@ decode_immediate(struct vie *vie) union { char buf[4]; int8_t signed8; + int16_t signed16; int32_t signed32; } u; /* Figure out immediate operand size (if any) */ - if (vie->op.op_flags & VIE_OP_F_IMM) - vie->imm_bytes = 4; - else if (vie->op.op_flags & VIE_OP_F_IMM8) + if (vie->op.op_flags & VIE_OP_F_IMM) { + /* + * Section 2.2.1.5 "Immediates", Intel SDM: + * In 64-bit mode the typical size of immediate operands + * remains 32-bits. When the operand size if 64-bits, the + * processor sign-extends all immediates to 64-bits prior + * to their use. + */ + if (vie->opsize == 4 || vie->opsize == 8) + vie->imm_bytes = 4; + else + vie->imm_bytes = 2; + } else if (vie->op.op_flags & VIE_OP_F_IMM8) { vie->imm_bytes = 1; + } if ((n = vie->imm_bytes) == 0) return (0); - if (n != 1 && n != 4) - panic("decode_immediate: invalid imm_bytes %d", n); + KASSERT(n == 1 || n == 2 || n == 4, + ("%s: invalid number of immediate bytes: %d", __func__, n)); for (i = 0; i < n; i++) { if (vie_peek(vie, &x)) @@ -1271,12 +1616,47 @@ decode_immediate(struct vie *vie) u.buf[i] = x; vie_advance(vie); } - + + /* sign-extend the immediate value before use */ if (n == 1) - vie->immediate = u.signed8; /* sign-extended */ + vie->immediate = u.signed8; + else if (n == 2) + vie->immediate = u.signed16; else - vie->immediate = u.signed32; /* sign-extended */ + vie->immediate = u.signed32; + + return (0); +} + +static int +decode_moffset(struct vie *vie) +{ + int i, n; + uint8_t x; + union { + char buf[8]; + uint64_t u64; + } u; + + if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0) + return (0); + + /* + * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: + * The memory offset size follows the address-size of the instruction. + */ + n = vie->addrsize; + KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n)); + + u.u64 = 0; + for (i = 0; i < n; i++) { + if (vie_peek(vie, &x)) + return (-1); + u.buf[i] = x; + vie_advance(vie); + } + vie->displacement = u.u64; return (0); } @@ -1301,7 +1681,7 @@ static int verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) { int error; - uint64_t base, idx; + uint64_t base, idx, gla2; /* Skip 'gla' verification */ if (gla == VIE_INVALID_GLA) @@ -1334,11 +1714,14 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) } } - if (base + vie->scale * idx + vie->displacement != gla) { + /* XXX assuming that the base address of the segment is 0 */ + gla2 = base + vie->scale * idx + vie->displacement; + gla2 &= size2mask[vie->addrsize]; + if (gla != gla2) { printf("verify_gla mismatch: " "base(0x%0lx), scale(%d), index(0x%0lx), " - "disp(0x%0lx), gla(0x%0lx)\n", - base, vie->scale, idx, vie->displacement, gla); + "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n", + base, vie->scale, idx, vie->displacement, gla, gla2); return (-1); } @@ -1347,13 +1730,11 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, - enum vm_cpu_mode cpu_mode, struct vie *vie) + enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) { - if (cpu_mode == CPU_MODE_64BIT) { - if (decode_rex(vie)) - return (-1); - } + if (decode_prefixes(vie, cpu_mode, cs_d)) + return (-1); if (decode_opcode(vie)) return (-1); @@ -1366,10 +1747,13 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, if (decode_displacement(vie)) return (-1); - + if (decode_immediate(vie)) return (-1); + if (decode_moffset(vie)) + return (-1); + if (verify_inst_length(vie)) return (-1); |