diff options
author | jhb <jhb@FreeBSD.org> | 2014-02-05 04:39:03 +0000 |
---|---|---|
committer | jhb <jhb@FreeBSD.org> | 2014-02-05 04:39:03 +0000 |
commit | f4e46bef9870d0ed15e9f02b57af7ccf63794d09 (patch) | |
tree | 9028dde86802e6e728a5c5a95359f88aeedcf6dc /sys/amd64 | |
parent | c9758b2e1cc84a86a515276115bc1bc41de75c54 (diff) | |
download | FreeBSD-src-f4e46bef9870d0ed15e9f02b57af7ccf63794d09.zip FreeBSD-src-f4e46bef9870d0ed15e9f02b57af7ccf63794d09.tar.gz |
Add support for FreeBSD/i386 guests under bhyve.
- Similar to the hack for bootinfo32.c in userboot, define
_MACHINE_ELF_WANT_32BIT in the load_elf32 file handlers in userboot.
This allows userboot to load 32-bit kernels and modules.
- Copy the SMAP generation code out of bootinfo64.c and into its own
file so it can be shared with bootinfo32.c to pass an SMAP to the i386
kernel.
- Use uint32_t instead of u_long when aligning module metadata in
bootinfo32.c in userboot, as otherwise the metadata used 64-bit
alignment which corrupted the layout.
- Populate the basemem and extmem members of the bootinfo struct passed
to 32-bit kernels.
- Fix the 32-bit stack in userboot to start at the top of the stack
instead of the bottom so that there is room to grow before the
kernel switches to its own stack.
- Push a fake return address onto the 32-bit stack in addition to the
arguments normally passed to exec() in the loader. This return
address is needed to convince recover_bootinfo() in the 32-bit
locore code that it is being invoked from a "new" boot block.
- Add a routine to libvmmapi to setup a 32-bit flat mode register state
including a GDT and TSS that is able to start the i386 kernel and
update bhyveload to use it when booting an i386 kernel.
- Use the guest register state to determine the CPU's current instruction
mode (32-bit vs 64-bit) and paging mode (flat, 32-bit, PAE, or long
mode) in the instruction emulation code. Update the gla2gpa() routine
used when fetching instructions to handle flat mode, 32-bit paging, and
PAE paging in addition to long mode paging. Don't look for a REX
prefix when the CPU is in 32-bit mode, and use the detected mode to
enable the existing 32-bit mode code when decoding the mod r/m byte.
Reviewed by: grehan, neel
MFC after: 1 month
Diffstat (limited to 'sys/amd64')
-rw-r--r-- | sys/amd64/include/vmm.h | 2 | ||||
-rw-r--r-- | sys/amd64/include/vmm_instruction_emul.h | 18 | ||||
-rw-r--r-- | sys/amd64/vmm/intel/vmx.c | 28 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm.c | 9 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_instruction_emul.c | 102 |
5 files changed, 129 insertions, 30 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index cc7c7ad..8b6933a 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -323,6 +323,8 @@ struct vm_exit { uint64_t gpa; uint64_t gla; uint64_t cr3; + enum vie_cpu_mode cpu_mode; + enum vie_paging_mode paging_mode; struct vie vie; } inst_emul; /* diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h index a7480e7..0901aa2 100644 --- a/sys/amd64/include/vmm_instruction_emul.h +++ b/sys/amd64/include/vmm_instruction_emul.h @@ -29,6 +29,18 @@ #ifndef _VMM_INSTRUCTION_EMUL_H_ #define _VMM_INSTRUCTION_EMUL_H_ +enum vie_cpu_mode { + CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ + CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ +}; + +enum vie_paging_mode { + PAGING_MODE_FLAT, + PAGING_MODE_32, + PAGING_MODE_PAE, + PAGING_MODE_64, +}; + /* * The data structures 'vie' and 'vie_op' are meant to be opaque to the * consumers of instruction decoding. The only reason why their contents @@ -107,7 +119,7 @@ int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie, */ int vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, uint64_t cr3, - struct vie *vie); + enum vie_paging_mode paging_mode, struct vie *vie); void vie_init(struct vie *vie); @@ -123,8 +135,8 @@ void vie_init(struct vie *vie); * in VIE_INVALID_GLA instead. */ #define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */ -int vmm_decode_instruction(struct vm *vm, int cpuid, - uint64_t gla, struct vie *vie); +int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, + enum vie_cpu_mode cpu_mode, struct vie *vie); #endif /* _KERNEL */ #endif /* _VMM_INSTRUCTION_EMUL_H_ */ diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 32ba998..4d2f563 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -1338,6 +1338,30 @@ vmx_emulate_cr_access(struct vmx *vmx, int vcpu, uint64_t exitqual) return (HANDLED); } +static enum vie_cpu_mode +vmx_cpu_mode(void) +{ + + if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LMA) + return (CPU_MODE_64BIT); + else + return (CPU_MODE_COMPATIBILITY); +} + +static enum vie_paging_mode +vmx_paging_mode(void) +{ + + if (!(vmcs_read(VMCS_GUEST_CR0) & CR0_PG)) + return (PAGING_MODE_FLAT); + if (!(vmcs_read(VMCS_GUEST_CR4) & CR4_PAE)) + return (PAGING_MODE_32); + if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LME) + return (PAGING_MODE_64); + else + return (PAGING_MODE_PAE); +} + static int ept_fault_type(uint64_t ept_qual) { @@ -1497,6 +1521,8 @@ vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) vmexit->u.inst_emul.gpa = DEFAULT_APIC_BASE + offset; vmexit->u.inst_emul.gla = VIE_INVALID_GLA; vmexit->u.inst_emul.cr3 = vmcs_guest_cr3(); + vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode(); + vmexit->u.inst_emul.paging_mode = vmx_paging_mode(); } /* @@ -1724,6 +1750,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmexit->u.inst_emul.gpa = gpa; vmexit->u.inst_emul.gla = vmcs_gla(); vmexit->u.inst_emul.cr3 = vmcs_guest_cr3(); + vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode(); + vmexit->u.inst_emul.paging_mode = vmx_paging_mode(); } /* * If Virtual NMIs control is 1 and the VM-exit is due to an diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 4759881..30f8dd3 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1056,6 +1056,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) struct vm_exit *vme; int error, inst_length; uint64_t rip, gla, gpa, cr3; + enum vie_cpu_mode cpu_mode; + enum vie_paging_mode paging_mode; mem_region_read_t mread; mem_region_write_t mwrite; @@ -1068,15 +1070,18 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) gla = vme->u.inst_emul.gla; gpa = vme->u.inst_emul.gpa; cr3 = vme->u.inst_emul.cr3; + cpu_mode = vme->u.inst_emul.cpu_mode; + paging_mode = vme->u.inst_emul.paging_mode; vie = &vme->u.inst_emul.vie; vie_init(vie); /* Fetch, decode and emulate the faulting instruction */ - if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, vie) != 0) + if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, + paging_mode, vie) != 0) return (EFAULT); - if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0) + if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0) return (EFAULT); /* return to userland unless this is an in-kernel emulated device */ diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index b10efbc..2085022 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -49,11 +49,6 @@ __FBSDID("$FreeBSD$"); #include <vmmapi.h> #endif /* _KERNEL */ -enum cpu_mode { - CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ - CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ -}; - /* struct vie_op.op_type */ enum { VIE_OP_TYPE_NONE = 0, @@ -546,16 +541,76 @@ vie_init(struct vie *vie) static int gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, - uint64_t *gpa, uint64_t *gpaend) + uint64_t *gpa, enum vie_paging_mode paging_mode) { int nlevels, ptpshift, ptpindex; uint64_t *ptpbase, pte, pgsize; + uint32_t *ptpbase32, pte32; void *cookie; - /* - * XXX assumes 64-bit guest with 4 page walk levels - */ - nlevels = 4; + if (paging_mode == PAGING_MODE_FLAT) { + *gpa = gla; + return (0); + } + + if (paging_mode == PAGING_MODE_32) { + nlevels = 2; + while (--nlevels >= 0) { + /* Zero out the lower 12 bits. */ + ptpphys &= ~0xfff; + + ptpbase32 = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, + VM_PROT_READ, &cookie); + + if (ptpbase32 == NULL) + goto error; + + ptpshift = PAGE_SHIFT + nlevels * 10; + ptpindex = (gla >> ptpshift) & 0x3FF; + pgsize = 1UL << ptpshift; + + pte32 = ptpbase32[ptpindex]; + + vm_gpa_release(cookie); + + if ((pte32 & PG_V) == 0) + goto error; + + if (pte32 & PG_PS) + break; + + ptpphys = pte32; + } + + /* Zero out the lower 'ptpshift' bits */ + pte32 >>= ptpshift; pte32 <<= ptpshift; + *gpa = pte32 | (gla & (pgsize - 1)); + return (0); + } + + if (paging_mode == PAGING_MODE_PAE) { + /* Zero out the lower 5 bits and the upper 12 bits */ + ptpphys >>= 5; ptpphys <<= 17; ptpphys >>= 12; + + ptpbase = vm_gpa_hold(vm, ptpphys, sizeof(*ptpbase) * 4, + VM_PROT_READ, &cookie); + if (ptpbase == NULL) + goto error; + + ptpindex = (gla >> 30) & 0x3; + + pte = ptpbase[ptpindex]; + + vm_gpa_release(cookie); + + if ((pte & PG_V) == 0) + goto error; + + ptpphys = pte; + + nlevels = 2; + } else + nlevels = 4; while (--nlevels >= 0) { /* Zero out the lower 12 bits and the upper 12 bits */ ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; @@ -589,7 +644,6 @@ gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; *gpa = pte | (gla & (pgsize - 1)); - *gpaend = pte + pgsize; return (0); error: @@ -598,10 +652,11 @@ error: int vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, - uint64_t cr3, struct vie *vie) + uint64_t cr3, enum vie_paging_mode paging_mode, + struct vie *vie) { int n, err, prot; - uint64_t gpa, gpaend, off; + uint64_t gpa, off; void *hpa, *cookie; /* @@ -614,7 +669,7 @@ vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, /* Copy the instruction into 'vie' */ while (vie->num_valid < inst_length) { - err = gla2gpa(vm, rip, cr3, &gpa, &gpaend); + err = gla2gpa(vm, rip, cr3, &gpa, paging_mode); if (err) break; @@ -717,15 +772,9 @@ decode_opcode(struct vie *vie) } static int -decode_modrm(struct vie *vie) +decode_modrm(struct vie *vie, enum vie_cpu_mode cpu_mode) { uint8_t x; - enum cpu_mode cpu_mode; - - /* - * XXX assuming that guest is in IA-32E 64-bit mode - */ - cpu_mode = CPU_MODE_64BIT; if (vie_peek(vie, &x)) return (-1); @@ -1002,16 +1051,19 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) } int -vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) +vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, + enum vie_cpu_mode cpu_mode, struct vie *vie) { - if (decode_rex(vie)) - return (-1); + if (cpu_mode == CPU_MODE_64BIT) { + if (decode_rex(vie)) + return (-1); + } if (decode_opcode(vie)) return (-1); - if (decode_modrm(vie)) + if (decode_modrm(vie, cpu_mode)) return (-1); if (decode_sib(vie)) |