diff options
author | neel <neel@FreeBSD.org> | 2015-06-27 22:48:22 +0000 |
---|---|---|
committer | neel <neel@FreeBSD.org> | 2015-06-27 22:48:22 +0000 |
commit | 115742fae3f7a7c52d6d5f4894f37e68dff4fd5c (patch) | |
tree | cb845c4cb2d3a3b67b3e1134742c3c5b250ae954 /sys/amd64/vmm/vmm_instruction_emul.c | |
parent | 02efaba1d135756ed65855bdc99e7d83f46cc4a2 (diff) | |
download | FreeBSD-src-115742fae3f7a7c52d6d5f4894f37e68dff4fd5c.zip FreeBSD-src-115742fae3f7a7c52d6d5f4894f37e68dff4fd5c.tar.gz |
MFC r276428:
Replace bhyve's minimal RTC emulation with a fully featured one in vmm.ko.
MFC r276432:
Initialize all fields of 'struct vm_exception exception' before passing it
to vm_inject_exception().
MFC r276763:
Clear blocking due to STI or MOV SS in the hypervisor when an instruction is
emulated or when the vcpu incurs an exception.
MFC r277149:
Clean up usage of 'struct vm_exception' to only to communicate information
from userspace to vmm.ko when injecting an exception.
MFC r277168:
Fix typo (missing comma).
MFC r277309:
Make the error message explicit instead of just printing the usage if the
virtual machine name is not specified.
MFC r277310:
Simplify instruction restart logic in bhyve.
MFC r277359:
Fix a bug in libvmmapi 'vm_copy_setup()' where it would return success even
if the 'gpa' was in the guest MMIO region.
MFC r277360:
MOVS instruction emulation.
MFC r277626:
Add macro to identify AVIC capability (advanced virtual interrupt controller)
in AMD processors.
MFC r279220:
Don't close a block context if it couldn't be opened avoiding a null deref.
MFC r279225:
Add "-u" option to bhyve(8) to indicate that the RTC should maintain UTC time.
MFC r279227:
Emulate MSR 0xC0011024 when running on AMD processors.
MFC r279228:
Always emulate MSR_PAT on Intel processors and don't rely on PAT save/restore
capability of VT-x. This lets bhyve run nested in older VMware versions that
don't support the PAT save/restore capability.
MFC r279540:
Fix warnings/errors when building vmm.ko with gcc.
Diffstat (limited to 'sys/amd64/vmm/vmm_instruction_emul.c')
-rw-r--r-- | sys/amd64/vmm/vmm_instruction_emul.c | 271 |
1 files changed, 267 insertions, 4 deletions
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index d1d7173..3db890e 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -70,6 +70,7 @@ enum { VIE_OP_TYPE_PUSH, VIE_OP_TYPE_CMP, VIE_OP_TYPE_POP, + VIE_OP_TYPE_MOVS, VIE_OP_TYPE_LAST }; @@ -78,6 +79,7 @@ enum { #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ #define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ #define VIE_OP_F_NO_MODRM (1 << 3) +#define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4) static const struct vie_op two_byte_opcodes[256] = { [0xB6] = { @@ -133,6 +135,16 @@ static const struct vie_op one_byte_opcodes[256] = { .op_type = VIE_OP_TYPE_MOV, .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, }, + [0xA4] = { + .op_byte = 0xA4, + .op_type = VIE_OP_TYPE_MOVS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION + }, + [0xA5] = { + .op_byte = 0xA5, + .op_type = VIE_OP_TYPE_MOVS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION + }, [0xC6] = { /* XXX Group 11 extended opcode - not just MOV */ .op_byte = 0xC6, @@ -559,6 +571,217 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (error); } +/* + * Helper function to calculate and validate a linear address. + * + * Returns 0 on success and 1 if an exception was injected into the guest. + */ +static int +get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, + int opsize, int addrsize, int prot, enum vm_reg_name seg, + enum vm_reg_name gpr, uint64_t *gla) +{ + struct seg_desc desc; + uint64_t cr0, val, rflags; + int error; + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); + KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); + + error = vm_get_seg_desc(vm, vcpuid, seg, &desc); + KASSERT(error == 0, ("%s: error %d getting segment descriptor %d", + __func__, error, seg)); + + error = vie_read_register(vm, vcpuid, gpr, &val); + KASSERT(error == 0, ("%s: error %d getting register %d", __func__, + error, gpr)); + + if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize, + addrsize, prot, gla)) { + if (seg == VM_REG_GUEST_SS) + vm_inject_ss(vm, vcpuid, 0); + else + vm_inject_gp(vm, vcpuid); + return (1); + } + + if (vie_canonical_check(paging->cpu_mode, *gla)) { + if (seg == VM_REG_GUEST_SS) + vm_inject_ss(vm, vcpuid, 0); + else + vm_inject_gp(vm, vcpuid); + return (1); + } + + if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) { + vm_inject_ac(vm, vcpuid, 0); + return (1); + } + + return (0); +} + +static int +emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ +#ifdef _KERNEL + struct vm_copyinfo copyinfo[2]; +#else + struct iovec copyinfo[2]; +#endif + uint64_t dstaddr, srcaddr, val; + uint64_t rcx, rdi, rsi, rflags; + int error, opsize, seg, repeat; + + opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize; + val = 0; + error = 0; + + /* + * XXX although the MOVS instruction is only supposed to be used with + * the "rep" prefix some guests like FreeBSD will use "repnz" instead. + * + * Empirically the "repnz" prefix has identical behavior to "rep" + * and the zero flag does not make a difference. + */ + repeat = vie->repz_present | vie->repnz_present; + + if (repeat) { + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); + KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); + + /* + * The count register is %rcx, %ecx or %cx depending on the + * address size of the instruction. + */ + if ((rcx & vie_size2mask(vie->addrsize)) == 0) + return (0); + } + + /* + * Source Destination Comments + * -------------------------------------------- + * (1) memory memory n/a + * (2) memory mmio emulated + * (3) mmio memory emulated + * (4) mmio mmio not emulated + * + * At this point we don't have sufficient information to distinguish + * between (2), (3) and (4). We use 'vm_copy_setup()' to tease this + * out because it will succeed only when operating on regular memory. + * + * XXX the emulation doesn't properly handle the case where 'gpa' + * is straddling the boundary between the normal memory and MMIO. + */ + + seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS; + error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, + PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr); + if (error) + goto done; + + error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ, + copyinfo, nitems(copyinfo)); + if (error == 0) { + /* + * case (2): read from system memory and write to mmio. + */ + vm_copyin(vm, vcpuid, copyinfo, &val, opsize); + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + error = memwrite(vm, vcpuid, gpa, val, opsize, arg); + goto done; + } else if (error > 0) { + /* + * Resume guest execution to handle fault. + */ + goto done; + } else { + /* + * 'vm_copy_setup()' is expected to fail for cases (3) and (4) + * if 'srcaddr' is in the mmio space. + */ + } + + error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, + PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr); + if (error) + goto done; + + error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize, + PROT_WRITE, copyinfo, nitems(copyinfo)); + if (error == 0) { + /* + * case (3): read from MMIO and write to system memory. + * + * A MMIO read can have side-effects so we commit to it + * only after vm_copy_setup() is successful. If a page-fault + * needs to be injected into the guest then it will happen + * before the MMIO read is attempted. + */ + error = memread(vm, vcpuid, gpa, &val, opsize, arg); + if (error) + goto done; + + vm_copyout(vm, vcpuid, &val, copyinfo, opsize); + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + } else if (error > 0) { + /* + * Resume guest execution to handle fault. + */ + goto done; + } else { + goto done; + } + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi); + KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); + KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); + + if (rflags & PSL_D) { + rsi -= opsize; + rdi -= opsize; + } else { + rsi += opsize; + rdi += opsize; + } + + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi, + vie->addrsize); + KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error)); + + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi, + vie->addrsize); + KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error)); + + if (repeat) { + rcx = rcx - 1; + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX, + rcx, vie->addrsize); + KASSERT(!error, ("%s: error %d updating rcx", __func__, error)); + + /* + * Repeat the instruction if the count register is not zero. + */ + if ((rcx & vie_size2mask(vie->addrsize)) != 0) + vm_restart_instruction(vm, vcpuid); + } +done: + if (error < 0) + return (EFAULT); + else + return (0); +} + static int emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) @@ -926,9 +1149,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg); rsp += size; } -#ifdef _KERNEL vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); -#endif if (error == 0) { error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, @@ -1012,6 +1233,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = emulate_movx(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; + case VIE_OP_TYPE_MOVS: + error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread, + memwrite, memarg); + break; case VIE_OP_TYPE_AND: error = emulate_and(vm, vcpuid, gpa, vie, memread, memwrite, memarg); @@ -1193,6 +1418,7 @@ vie_init(struct vie *vie, const char *inst_bytes, int inst_length) vie->base_register = VM_REG_LAST; vie->index_register = VM_REG_LAST; + vie->segment_register = VM_REG_LAST; if (inst_length) { bcopy(inst_bytes, vie->inst, inst_length); @@ -1458,6 +1684,35 @@ vie_advance(struct vie *vie) vie->num_processed++; } +static bool +segment_override(uint8_t x, int *seg) +{ + + switch (x) { + case 0x2E: + *seg = VM_REG_GUEST_CS; + break; + case 0x36: + *seg = VM_REG_GUEST_SS; + break; + case 0x3E: + *seg = VM_REG_GUEST_DS; + break; + case 0x26: + *seg = VM_REG_GUEST_ES; + break; + case 0x64: + *seg = VM_REG_GUEST_FS; + break; + case 0x65: + *seg = VM_REG_GUEST_GS; + break; + default: + return (false); + } + return (true); +} + static int decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) { @@ -1471,6 +1726,12 @@ decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) vie->opsize_override = 1; else if (x == 0x67) vie->addrsize_override = 1; + else if (x == 0xF3) + vie->repz_present = 1; + else if (x == 0xF2) + vie->repnz_present = 1; + else if (segment_override(x, &vie->segment_register)) + vie->segment_override = 1; else break; @@ -1923,8 +2184,10 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, if (verify_inst_length(vie)) return (-1); - if (verify_gla(vm, cpuid, gla, vie)) - return (-1); + if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) { + if (verify_gla(vm, cpuid, gla, vie)) + return (-1); + } vie->decoded = 1; /* success */ |