summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorneel <neel@FreeBSD.org>2014-05-24 20:26:57 +0000
committerneel <neel@FreeBSD.org>2014-05-24 20:26:57 +0000
commit6a6e13c407a246faf2265a0ed79ab28fd9419bb6 (patch)
tree507882e99e7a06753f2707b9ed35d4f6aeea4020 /sys
parent52a4f11861c5ba735a7bb75bd093905e734e16bd (diff)
downloadFreeBSD-src-6a6e13c407a246faf2265a0ed79ab28fd9419bb6.zip
FreeBSD-src-6a6e13c407a246faf2265a0ed79ab28fd9419bb6.tar.gz
Consolidate all the information needed by the guest page table walker into
'struct vm_guest_paging'. Check for canonical addressing in vmm_gla2gpa() and inject a protection fault into the guest if a violation is detected. If the page table walk is restarted in vmm_gla2gpa() then reset 'ptpphys' to point to the root of the page tables.
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/include/vmm.h77
-rw-r--r--sys/amd64/include/vmm_instruction_emul.h78
-rw-r--r--sys/amd64/vmm/intel/vmx.c24
-rw-r--r--sys/amd64/vmm/vmm.c22
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c68
-rw-r--r--sys/amd64/vmm/vmm_ioport.c12
6 files changed, 154 insertions, 127 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 407b5c1..021efaf 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -243,8 +243,6 @@ enum vm_reg_name vm_segment_name(int seg_encoding);
#endif /* KERNEL */
-#include <machine/vmm_instruction_emul.h>
-
#define VM_MAXCPU 16 /* maximum virtual cpus */
/*
@@ -324,6 +322,71 @@ struct seg_desc {
uint32_t access;
};
+enum vm_cpu_mode {
+ CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */
+ CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */
+};
+
+enum vm_paging_mode {
+ PAGING_MODE_FLAT,
+ PAGING_MODE_32,
+ PAGING_MODE_PAE,
+ PAGING_MODE_64,
+};
+
+struct vm_guest_paging {
+ uint64_t cr3;
+ int cpl;
+ enum vm_cpu_mode cpu_mode;
+ enum vm_paging_mode paging_mode;
+};
+
+/*
+ * The data structures 'vie' and 'vie_op' are meant to be opaque to the
+ * consumers of instruction decoding. The only reason why their contents
+ * need to be exposed is because they are part of the 'vm_exit' structure.
+ */
+struct vie_op {
+ uint8_t op_byte; /* actual opcode byte */
+ uint8_t op_type; /* type of operation (e.g. MOV) */
+ uint16_t op_flags;
+};
+
+#define VIE_INST_SIZE 15
+struct vie {
+ uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
+ uint8_t num_valid; /* size of the instruction */
+ uint8_t num_processed;
+
+ uint8_t rex_w:1, /* REX prefix */
+ rex_r:1,
+ rex_x:1,
+ rex_b:1,
+ rex_present:1;
+
+ uint8_t mod:2, /* ModRM byte */
+ reg:4,
+ rm:4;
+
+ uint8_t ss:2, /* SIB byte */
+ index:4,
+ base:4;
+
+ uint8_t disp_bytes;
+ uint8_t imm_bytes;
+
+ uint8_t scale;
+ int base_register; /* VM_REG_GUEST_xyz */
+ int index_register; /* VM_REG_GUEST_xyz */
+
+ int64_t displacement; /* optional addr displacement */
+ int64_t immediate; /* optional immediate operand */
+
+ uint8_t decoded; /* set to 1 if successfully decoded */
+
+ struct vie_op op; /* opcode description */
+};
+
enum vm_exitcode {
VM_EXITCODE_INOUT,
VM_EXITCODE_VMX,
@@ -355,14 +418,11 @@ struct vm_inout {
struct vm_inout_str {
struct vm_inout inout; /* must be the first element */
- enum vie_cpu_mode cpu_mode;
- enum vie_paging_mode paging_mode;
+ struct vm_guest_paging paging;
uint64_t rflags;
uint64_t cr0;
- uint64_t cr3;
uint64_t index;
uint64_t count; /* rep=1 (%rcx), rep=0 (1) */
- int cpl;
int addrsize;
enum vm_reg_name seg_name;
struct seg_desc seg_desc;
@@ -384,10 +444,7 @@ struct vm_exit {
struct {
uint64_t gpa;
uint64_t gla;
- uint64_t cr3;
- enum vie_cpu_mode cpu_mode;
- enum vie_paging_mode paging_mode;
- int cpl;
+ struct vm_guest_paging paging;
struct vie vie;
} inst_emul;
/*
diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h
index 797cb39..1703feb 100644
--- a/sys/amd64/include/vmm_instruction_emul.h
+++ b/sys/amd64/include/vmm_instruction_emul.h
@@ -29,66 +29,6 @@
#ifndef _VMM_INSTRUCTION_EMUL_H_
#define _VMM_INSTRUCTION_EMUL_H_
-enum vm_reg_name;
-
-enum vie_cpu_mode {
- CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */
- CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */
-};
-
-enum vie_paging_mode {
- PAGING_MODE_FLAT,
- PAGING_MODE_32,
- PAGING_MODE_PAE,
- PAGING_MODE_64,
-};
-
-/*
- * The data structures 'vie' and 'vie_op' are meant to be opaque to the
- * consumers of instruction decoding. The only reason why their contents
- * need to be exposed is because they are part of the 'vm_exit' structure.
- */
-struct vie_op {
- uint8_t op_byte; /* actual opcode byte */
- uint8_t op_type; /* type of operation (e.g. MOV) */
- uint16_t op_flags;
-};
-
-#define VIE_INST_SIZE 15
-struct vie {
- uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
- uint8_t num_valid; /* size of the instruction */
- uint8_t num_processed;
-
- uint8_t rex_w:1, /* REX prefix */
- rex_r:1,
- rex_x:1,
- rex_b:1,
- rex_present:1;
-
- uint8_t mod:2, /* ModRM byte */
- reg:4,
- rm:4;
-
- uint8_t ss:2, /* SIB byte */
- index:4,
- base:4;
-
- uint8_t disp_bytes;
- uint8_t imm_bytes;
-
- uint8_t scale;
- int base_register; /* VM_REG_GUEST_xyz */
- int index_register; /* VM_REG_GUEST_xyz */
-
- int64_t displacement; /* optional addr displacement */
- int64_t immediate; /* optional immediate operand */
-
- uint8_t decoded; /* set to 1 if successfully decoded */
-
- struct vie_op op; /* opcode description */
-};
-
/*
* Callback functions to read and write memory regions.
*/
@@ -122,6 +62,9 @@ int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
uint64_t rflags, uint64_t gla);
+/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
+int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+
uint64_t vie_size2mask(int size);
#ifdef _KERNEL
@@ -131,23 +74,22 @@ uint64_t vie_size2mask(int size);
* 'vie' must be initialized before calling 'vmm_fetch_instruction()'
*/
int vmm_fetch_instruction(struct vm *vm, int cpuid,
- uint64_t rip, int inst_length, uint64_t cr3,
- enum vie_paging_mode paging_mode, int cpl,
- struct vie *vie);
+ struct vm_guest_paging *guest_paging,
+ uint64_t rip, int inst_length, struct vie *vie);
/*
* Translate the guest linear address 'gla' to a guest physical address.
*
* Returns 0 on success and '*gpa' contains the result of the translation.
- * Returns 1 if a page fault exception was injected into the guest.
+ * Returns 1 if an exception was injected into the guest.
* Returns -1 otherwise.
*/
-int vmm_gla2gpa(struct vm *vm, int vcpuid, uint64_t gla, uint64_t cr3,
- uint64_t *gpa, enum vie_paging_mode paging_mode, int cpl, int prot);
+int vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa);
void vie_init(struct vie *vie);
-uint64_t vie_segbase(enum vm_reg_name segment, enum vie_cpu_mode cpu_mode,
+uint64_t vie_segbase(enum vm_reg_name segment, enum vm_cpu_mode cpu_mode,
const struct seg_desc *desc);
/*
@@ -163,7 +105,7 @@ uint64_t vie_segbase(enum vm_reg_name segment, enum vie_cpu_mode cpu_mode,
*/
#define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */
int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
- enum vie_cpu_mode cpu_mode, struct vie *vie);
+ enum vm_cpu_mode cpu_mode, struct vie *vie);
#endif /* _KERNEL */
#endif /* _VMM_INSTRUCTION_EMUL_H_ */
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 5754b22..8efb667 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
#include "vmm_host.h"
#include "vmm_ioport.h"
#include "vmm_ipi.h"
@@ -1517,7 +1518,7 @@ vmx_cpl(void)
return ((ssar >> 5) & 0x3);
}
-static enum vie_cpu_mode
+static enum vm_cpu_mode
vmx_cpu_mode(void)
{
@@ -1527,7 +1528,7 @@ vmx_cpu_mode(void)
return (CPU_MODE_COMPATIBILITY);
}
-static enum vie_paging_mode
+static enum vm_paging_mode
vmx_paging_mode(void)
{
@@ -1607,15 +1608,21 @@ inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in,
}
static void
+vmx_paging_info(struct vm_guest_paging *paging)
+{
+ paging->cr3 = vmcs_guest_cr3();
+ paging->cpl = vmx_cpl();
+ paging->cpu_mode = vmx_cpu_mode();
+ paging->paging_mode = vmx_paging_mode();
+}
+
+static void
vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
{
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
vmexit->u.inst_emul.gpa = gpa;
vmexit->u.inst_emul.gla = gla;
- vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
- vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode();
- vmexit->u.inst_emul.paging_mode = vmx_paging_mode();
- vmexit->u.inst_emul.cpl = vmx_cpl();
+ vmx_paging_info(&vmexit->u.inst_emul.paging);
}
static int
@@ -1998,12 +2005,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
vmexit->exitcode = VM_EXITCODE_INOUT_STR;
vis = &vmexit->u.inout_str;
- vis->cpu_mode = vmx_cpu_mode();
- vis->paging_mode = vmx_paging_mode();
+ vmx_paging_info(&vis->paging);
vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS);
vis->cr0 = vmcs_read(VMCS_GUEST_CR0);
- vis->cr3 = vmcs_read(VMCS_GUEST_CR3);
- vis->cpl = vmx_cpl();
vis->index = inout_str_index(vmx, vcpu, in);
vis->count = inout_str_count(vmx, vcpu, vis->inout.rep);
vis->addrsize = inout_str_addrsize(inst_info);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index f5ed0fe..8ebdfd7 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@@ -1132,32 +1133,25 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
struct vie *vie;
struct vcpu *vcpu;
struct vm_exit *vme;
- int cpl, error, inst_length;
- uint64_t rip, gla, gpa, cr3;
- enum vie_cpu_mode cpu_mode;
- enum vie_paging_mode paging_mode;
+ uint64_t gla, gpa;
+ struct vm_guest_paging *paging;
mem_region_read_t mread;
mem_region_write_t mwrite;
+ int error;
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
- rip = vme->rip;
- inst_length = vme->inst_length;
-
gla = vme->u.inst_emul.gla;
gpa = vme->u.inst_emul.gpa;
- cr3 = vme->u.inst_emul.cr3;
- cpl = vme->u.inst_emul.cpl;
- cpu_mode = vme->u.inst_emul.cpu_mode;
- paging_mode = vme->u.inst_emul.paging_mode;
vie = &vme->u.inst_emul.vie;
+ paging = &vme->u.inst_emul.paging;
vie_init(vie);
/* Fetch, decode and emulate the faulting instruction */
- error = vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3,
- paging_mode, cpl, vie);
+ error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip,
+ vme->inst_length, vie);
if (error == 1)
return (0); /* Resume guest to handle page fault */
else if (error == -1)
@@ -1165,7 +1159,7 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
else if (error != 0)
panic("%s: vmm_fetch_instruction error %d", __func__, error);
- if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0)
+ if (vmm_decode_instruction(vm, vcpuid, gla, paging->cpu_mode, vie) != 0)
return (EFAULT);
/* return to userland unless this is an in-kernel emulated device */
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index cef8563..0f520d7 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#define KASSERT(exp,msg) assert((exp))
#endif /* _KERNEL */
+#include <machine/vmm_instruction_emul.h>
#include <x86/psl.h>
#include <x86/specialreg.h>
@@ -579,6 +580,25 @@ vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
return ((gla & (size - 1)) ? 1 : 0);
}
+int
+vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
+{
+ uint64_t mask;
+
+ if (cpu_mode != CPU_MODE_64BIT)
+ return (0);
+
+ /*
+ * The value of the bit 47 in the 'gla' should be replicated in the
+ * most significant 16 bits.
+ */
+ mask = ~((1UL << 48) - 1);
+ if (gla & (1UL << 47))
+ return ((gla & mask) != mask);
+ else
+ return ((gla & mask) != 0);
+}
+
uint64_t
vie_size2mask(int size)
{
@@ -637,31 +657,41 @@ ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie)
}
int
-vmm_gla2gpa(struct vm *vm, int vcpuid, uint64_t gla, uint64_t ptpphys,
- uint64_t *gpa, enum vie_paging_mode paging_mode, int cpl, int prot)
+vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa)
{
int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable;
u_int retries;
- uint64_t *ptpbase, pte, pgsize;
+ uint64_t *ptpbase, ptpphys, pte, pgsize;
uint32_t *ptpbase32, pte32;
void *cookie;
- usermode = (cpl == 3 ? 1 : 0);
+ usermode = (paging->cpl == 3 ? 1 : 0);
writable = prot & VM_PROT_WRITE;
cookie = NULL;
retval = 0;
retries = 0;
restart:
+ ptpphys = paging->cr3; /* root of the page tables */
ptp_release(&cookie);
if (retries++ > 0)
maybe_yield();
- if (paging_mode == PAGING_MODE_FLAT) {
+ if (vie_canonical_check(paging->cpu_mode, gla)) {
+ /*
+ * XXX assuming a non-stack reference otherwise a stack fault
+ * should be generated.
+ */
+ vm_inject_gp(vm, vcpuid);
+ goto fault;
+ }
+
+ if (paging->paging_mode == PAGING_MODE_FLAT) {
*gpa = gla;
goto done;
}
- if (paging_mode == PAGING_MODE_32) {
+ if (paging->paging_mode == PAGING_MODE_32) {
nlevels = 2;
while (--nlevels >= 0) {
/* Zero out the lower 12 bits. */
@@ -684,7 +714,7 @@ restart:
pfcode = pf_error_code(usermode, prot, 0,
pte32);
vm_inject_pf(vm, vcpuid, pfcode, gla);
- goto pagefault;
+ goto fault;
}
/*
@@ -722,7 +752,7 @@ restart:
goto done;
}
- if (paging_mode == PAGING_MODE_PAE) {
+ if (paging->paging_mode == PAGING_MODE_PAE) {
/* Zero out the lower 5 bits and the upper 32 bits */
ptpphys &= 0xffffffe0UL;
@@ -737,7 +767,7 @@ restart:
if ((pte & PG_V) == 0) {
pfcode = pf_error_code(usermode, prot, 0, pte);
vm_inject_pf(vm, vcpuid, pfcode, gla);
- goto pagefault;
+ goto fault;
}
ptpphys = pte;
@@ -764,7 +794,7 @@ restart:
(writable && (pte & PG_RW) == 0)) {
pfcode = pf_error_code(usermode, prot, 0, pte);
vm_inject_pf(vm, vcpuid, pfcode, gla);
- goto pagefault;
+ goto fault;
}
/* Set the accessed bit in the page table entry */
@@ -779,7 +809,7 @@ restart:
if (pgsize > 1 * GB) {
pfcode = pf_error_code(usermode, prot, 1, pte);
vm_inject_pf(vm, vcpuid, pfcode, gla);
- goto pagefault;
+ goto fault;
}
break;
}
@@ -802,15 +832,14 @@ done:
error:
retval = -1;
goto done;
-pagefault:
+fault:
retval = 1;
goto done;
}
int
-vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
- uint64_t cr3, enum vie_paging_mode paging_mode, int cpl,
- struct vie *vie)
+vmm_fetch_instruction(struct vm *vm, int cpuid, struct vm_guest_paging *paging,
+ uint64_t rip, int inst_length, struct vie *vie)
{
int n, error, prot;
uint64_t gpa, off;
@@ -826,8 +855,7 @@ vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
/* Copy the instruction into 'vie' */
while (vie->num_valid < inst_length) {
- error = vmm_gla2gpa(vm, cpuid, rip, cr3, &gpa, paging_mode,
- cpl, prot);
+ error = vmm_gla2gpa(vm, cpuid, paging, rip, prot, &gpa);
if (error)
return (error);
@@ -930,7 +958,7 @@ decode_opcode(struct vie *vie)
}
static int
-decode_modrm(struct vie *vie, enum vie_cpu_mode cpu_mode)
+decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode)
{
uint8_t x;
@@ -1210,7 +1238,7 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
int
vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
- enum vie_cpu_mode cpu_mode, struct vie *vie)
+ enum vm_cpu_mode cpu_mode, struct vie *vie)
{
if (cpu_mode == CPU_MODE_64BIT) {
@@ -1245,7 +1273,7 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
}
uint64_t
-vie_segbase(enum vm_reg_name seg, enum vie_cpu_mode cpu_mode,
+vie_segbase(enum vm_reg_name seg, enum vm_cpu_mode cpu_mode,
const struct seg_desc *desc)
{
int basesize;
diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
index e28d510..f9fda2d 100644
--- a/sys/amd64/vmm/vmm_ioport.c
+++ b/sys/amd64/vmm/vmm_ioport.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
#include <x86/psl.h>
#include "vatpic.h"
@@ -167,9 +168,9 @@ emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
* The #GP(0) fault conditions described above don't apply in
* 64-bit mode.
*/
- if (vis->cpu_mode != CPU_MODE_64BIT) {
+ if (vis->paging.cpu_mode != CPU_MODE_64BIT) {
VCPU_CTR1(vm, vcpuid, "ins/outs not emulated in cpu mode %d",
- vis->cpu_mode);
+ vis->paging.cpu_mode);
return (EINVAL);
}
@@ -181,7 +182,8 @@ emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
return (EINVAL);
}
- segbase = vie_segbase(vis->seg_name, vis->cpu_mode, &vis->seg_desc);
+ segbase = vie_segbase(vis->seg_name, vis->paging.cpu_mode,
+ &vis->seg_desc);
index = vis->index & vie_size2mask(vis->addrsize);
gla = segbase + index;
@@ -195,8 +197,8 @@ emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
}
vis->gla = gla;
- error = vmm_gla2gpa(vm, vcpuid, gla, vis->cr3, &vis->gpa,
- vis->paging_mode, vis->cpl, in ? VM_PROT_WRITE : VM_PROT_READ);
+ error = vmm_gla2gpa(vm, vcpuid, &vis->paging, gla,
+ in ? VM_PROT_WRITE : VM_PROT_READ, &vis->gpa);
KASSERT(error == 0 || error == 1 || error == -1,
("%s: vmm_gla2gpa unexpected error %d", __func__, error));
if (error == -1) {
OpenPOWER on IntegriCloud