summaryrefslogtreecommitdiffstats
path: root/sys/amd64/vmm/vmm_instruction_emul.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64/vmm/vmm_instruction_emul.c')
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c359
1 files changed, 311 insertions, 48 deletions
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index f7d109c..921deb5 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/pcpu.h>
#include <sys/systm.h>
+#include <sys/proc.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@@ -46,9 +47,15 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
+#include <assert.h>
#include <vmmapi.h>
+#define KASSERT(exp,msg) assert((exp))
#endif /* _KERNEL */
+#include <machine/vmm_instruction_emul.h>
+#include <x86/psl.h>
+#include <x86/specialreg.h>
+
/* struct vie_op.op_type */
enum {
VIE_OP_TYPE_NONE = 0,
@@ -205,7 +212,7 @@ vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval)
return (error);
}
-static int
+int
vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
uint64_t val, int size)
{
@@ -560,6 +567,155 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (error);
}
+int
+vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
+{
+ KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
+ ("%s: invalid size %d", __func__, size));
+ KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl));
+
+ if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
+ return (0);
+
+ return ((gla & (size - 1)) ? 1 : 0);
+}
+
+int
+vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
+{
+ uint64_t mask;
+
+ if (cpu_mode != CPU_MODE_64BIT)
+ return (0);
+
+ /*
+ * The value of the bit 47 in the 'gla' should be replicated in the
+ * most significant 16 bits.
+ */
+ mask = ~((1UL << 48) - 1);
+ if (gla & (1UL << 47))
+ return ((gla & mask) != mask);
+ else
+ return ((gla & mask) != 0);
+}
+
+uint64_t
+vie_size2mask(int size)
+{
+ KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
+ ("vie_size2mask: invalid size %d", size));
+ return (size2mask[size]);
+}
+
+int
+vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+ struct seg_desc *desc, uint64_t offset, int length, int addrsize,
+ int prot, uint64_t *gla)
+{
+ uint64_t firstoff, low_limit, high_limit, segbase;
+ int glasize, type;
+
+ KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS,
+ ("%s: invalid segment %d", __func__, seg));
+ KASSERT(length == 1 || length == 2 || length == 4 || length == 8,
+ ("%s: invalid operand size %d", __func__, length));
+ KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
+ ("%s: invalid prot %#x", __func__, prot));
+
+ firstoff = offset;
+ if (cpu_mode == CPU_MODE_64BIT) {
+ KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address "
+ "size %d for cpu_mode %d", __func__, addrsize, cpu_mode));
+ glasize = 8;
+ } else {
+ KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address "
+ "size %d for cpu mode %d", __func__, addrsize, cpu_mode));
+ glasize = 4;
+ /*
+ * If the segment selector is loaded with a NULL selector
+ * then the descriptor is unusable and attempting to use
+ * it results in a #GP(0).
+ */
+ if (SEG_DESC_UNUSABLE(desc))
+ return (-1);
+
+ /*
+ * The processor generates a #NP exception when a segment
+ * register is loaded with a selector that points to a
+ * descriptor that is not present. If this was the case then
+ * it would have been checked before the VM-exit.
+ */
+ KASSERT(SEG_DESC_PRESENT(desc), ("segment %d not present: %#x",
+ seg, desc->access));
+
+ /*
+ * The descriptor type must indicate a code/data segment.
+ */
+ type = SEG_DESC_TYPE(desc);
+ KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
+ "descriptor type %#x", seg, type));
+
+ if (prot & PROT_READ) {
+ /* #GP on a read access to a exec-only code segment */
+ if ((type & 0xA) == 0x8)
+ return (-1);
+ }
+
+ if (prot & PROT_WRITE) {
+ /*
+ * #GP on a write access to a code segment or a
+ * read-only data segment.
+ */
+ if (type & 0x8) /* code segment */
+ return (-1);
+
+ if ((type & 0xA) == 0) /* read-only data seg */
+ return (-1);
+ }
+
+ /*
+ * 'desc->limit' is fully expanded taking granularity into
+ * account.
+ */
+ if ((type & 0xC) == 0x4) {
+ /* expand-down data segment */
+ low_limit = desc->limit + 1;
+ high_limit = SEG_DESC_DEF32(desc) ? 0xffffffff : 0xffff;
+ } else {
+ /* code segment or expand-up data segment */
+ low_limit = 0;
+ high_limit = desc->limit;
+ }
+
+ while (length > 0) {
+ offset &= vie_size2mask(addrsize);
+ if (offset < low_limit || offset > high_limit)
+ return (-1);
+ offset++;
+ length--;
+ }
+ }
+
+ /*
+ * In 64-bit mode all segments except %fs and %gs have a segment
+ * base address of 0.
+ */
+ if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
+ seg != VM_REG_GUEST_GS) {
+ segbase = 0;
+ } else {
+ segbase = desc->base;
+ }
+
+ /*
+ * Truncate 'firstoff' to the effective address size before adding
+ * it to the segment base.
+ */
+ firstoff &= vie_size2mask(addrsize);
+ *gla = (segbase + firstoff) & vie_size2mask(glasize);
+ return (0);
+}
+
#ifdef _KERNEL
void
vie_init(struct vie *vie)
@@ -572,28 +728,86 @@ vie_init(struct vie *vie)
}
static int
-gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
- uint64_t *gpa, enum vie_paging_mode paging_mode)
+pf_error_code(int usermode, int prot, int rsvd, uint64_t pte)
+{
+ int error_code = 0;
+
+ if (pte & PG_V)
+ error_code |= PGEX_P;
+ if (prot & VM_PROT_WRITE)
+ error_code |= PGEX_W;
+ if (usermode)
+ error_code |= PGEX_U;
+ if (rsvd)
+ error_code |= PGEX_RSV;
+ if (prot & VM_PROT_EXECUTE)
+ error_code |= PGEX_I;
+
+ return (error_code);
+}
+
+static void
+ptp_release(void **cookie)
+{
+ if (*cookie != NULL) {
+ vm_gpa_release(*cookie);
+ *cookie = NULL;
+ }
+}
+
+static void *
+ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie)
{
- int nlevels, ptpshift, ptpindex;
- uint64_t *ptpbase, pte, pgsize;
+ void *ptr;
+
+ ptp_release(cookie);
+ ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie);
+ return (ptr);
+}
+
+int
+vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa)
+{
+ int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable;
+ u_int retries;
+ uint64_t *ptpbase, ptpphys, pte, pgsize;
uint32_t *ptpbase32, pte32;
void *cookie;
- if (paging_mode == PAGING_MODE_FLAT) {
+ usermode = (paging->cpl == 3 ? 1 : 0);
+ writable = prot & VM_PROT_WRITE;
+ cookie = NULL;
+ retval = 0;
+ retries = 0;
+restart:
+ ptpphys = paging->cr3; /* root of the page tables */
+ ptp_release(&cookie);
+ if (retries++ > 0)
+ maybe_yield();
+
+ if (vie_canonical_check(paging->cpu_mode, gla)) {
+ /*
+ * XXX assuming a non-stack reference otherwise a stack fault
+ * should be generated.
+ */
+ vm_inject_gp(vm, vcpuid);
+ goto fault;
+ }
+
+ if (paging->paging_mode == PAGING_MODE_FLAT) {
*gpa = gla;
- return (0);
+ goto done;
}
- if (paging_mode == PAGING_MODE_32) {
+ if (paging->paging_mode == PAGING_MODE_32) {
nlevels = 2;
while (--nlevels >= 0) {
/* Zero out the lower 12 bits. */
ptpphys &= ~0xfff;
- ptpbase32 = vm_gpa_hold(vm, ptpphys, PAGE_SIZE,
- VM_PROT_READ, &cookie);
-
+ ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
+
if (ptpbase32 == NULL)
goto error;
@@ -603,29 +817,55 @@ gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
pte32 = ptpbase32[ptpindex];
- vm_gpa_release(cookie);
-
- if ((pte32 & PG_V) == 0)
- goto error;
+ if ((pte32 & PG_V) == 0 ||
+ (usermode && (pte32 & PG_U) == 0) ||
+ (writable && (pte32 & PG_RW) == 0)) {
+ pfcode = pf_error_code(usermode, prot, 0,
+ pte32);
+ vm_inject_pf(vm, vcpuid, pfcode, gla);
+ goto fault;
+ }
- if (pte32 & PG_PS)
+ /*
+ * Emulate the x86 MMU's management of the accessed
+ * and dirty flags. While the accessed flag is set
+ * at every level of the page table, the dirty flag
+ * is only set at the last level providing the guest
+ * physical address.
+ */
+ if ((pte32 & PG_A) == 0) {
+ if (atomic_cmpset_32(&ptpbase32[ptpindex],
+ pte32, pte32 | PG_A) == 0) {
+ goto restart;
+ }
+ }
+
+ /* XXX must be ignored if CR4.PSE=0 */
+ if (nlevels > 0 && (pte32 & PG_PS) != 0)
break;
ptpphys = pte32;
}
+ /* Set the dirty bit in the page table entry if necessary */
+ if (writable && (pte32 & PG_M) == 0) {
+ if (atomic_cmpset_32(&ptpbase32[ptpindex],
+ pte32, pte32 | PG_M) == 0) {
+ goto restart;
+ }
+ }
+
/* Zero out the lower 'ptpshift' bits */
pte32 >>= ptpshift; pte32 <<= ptpshift;
*gpa = pte32 | (gla & (pgsize - 1));
- return (0);
+ goto done;
}
- if (paging_mode == PAGING_MODE_PAE) {
- /* Zero out the lower 5 bits and the upper 12 bits */
- ptpphys >>= 5; ptpphys <<= 17; ptpphys >>= 12;
+ if (paging->paging_mode == PAGING_MODE_PAE) {
+ /* Zero out the lower 5 bits and the upper 32 bits */
+ ptpphys &= 0xffffffe0UL;
- ptpbase = vm_gpa_hold(vm, ptpphys, sizeof(*ptpbase) * 4,
- VM_PROT_READ, &cookie);
+ ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie);
if (ptpbase == NULL)
goto error;
@@ -633,10 +873,11 @@ gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
pte = ptpbase[ptpindex];
- vm_gpa_release(cookie);
-
- if ((pte & PG_V) == 0)
- goto error;
+ if ((pte & PG_V) == 0) {
+ pfcode = pf_error_code(usermode, prot, 0, pte);
+ vm_inject_pf(vm, vcpuid, pfcode, gla);
+ goto fault;
+ }
ptpphys = pte;
@@ -647,8 +888,7 @@ gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
/* Zero out the lower 12 bits and the upper 12 bits */
ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
- ptpbase = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, VM_PROT_READ,
- &cookie);
+ ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
if (ptpbase == NULL)
goto error;
@@ -658,36 +898,59 @@ gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
pte = ptpbase[ptpindex];
- vm_gpa_release(cookie);
+ if ((pte & PG_V) == 0 ||
+ (usermode && (pte & PG_U) == 0) ||
+ (writable && (pte & PG_RW) == 0)) {
+ pfcode = pf_error_code(usermode, prot, 0, pte);
+ vm_inject_pf(vm, vcpuid, pfcode, gla);
+ goto fault;
+ }
- if ((pte & PG_V) == 0)
- goto error;
+ /* Set the accessed bit in the page table entry */
+ if ((pte & PG_A) == 0) {
+ if (atomic_cmpset_64(&ptpbase[ptpindex],
+ pte, pte | PG_A) == 0) {
+ goto restart;
+ }
+ }
- if (pte & PG_PS) {
- if (pgsize > 1 * GB)
- goto error;
- else
- break;
+ if (nlevels > 0 && (pte & PG_PS) != 0) {
+ if (pgsize > 1 * GB) {
+ pfcode = pf_error_code(usermode, prot, 1, pte);
+ vm_inject_pf(vm, vcpuid, pfcode, gla);
+ goto fault;
+ }
+ break;
}
ptpphys = pte;
}
+ /* Set the dirty bit in the page table entry if necessary */
+ if (writable && (pte & PG_M) == 0) {
+ if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0)
+ goto restart;
+ }
+
/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
*gpa = pte | (gla & (pgsize - 1));
- return (0);
-
+done:
+ ptp_release(&cookie);
+ return (retval);
error:
- return (-1);
+ retval = -1;
+ goto done;
+fault:
+ retval = 1;
+ goto done;
}
int
-vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
- uint64_t cr3, enum vie_paging_mode paging_mode,
- struct vie *vie)
+vmm_fetch_instruction(struct vm *vm, int cpuid, struct vm_guest_paging *paging,
+ uint64_t rip, int inst_length, struct vie *vie)
{
- int n, err, prot;
+ int n, error, prot;
uint64_t gpa, off;
void *hpa, *cookie;
@@ -701,9 +964,9 @@ vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
/* Copy the instruction into 'vie' */
while (vie->num_valid < inst_length) {
- err = gla2gpa(vm, rip, cr3, &gpa, paging_mode);
- if (err)
- break;
+ error = vmm_gla2gpa(vm, cpuid, paging, rip, prot, &gpa);
+ if (error)
+ return (error);
off = gpa & PAGE_MASK;
n = min(inst_length - vie->num_valid, PAGE_SIZE - off);
@@ -804,7 +1067,7 @@ decode_opcode(struct vie *vie)
}
static int
-decode_modrm(struct vie *vie, enum vie_cpu_mode cpu_mode)
+decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode)
{
uint8_t x;
@@ -1084,7 +1347,7 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
int
vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
- enum vie_cpu_mode cpu_mode, struct vie *vie)
+ enum vm_cpu_mode cpu_mode, struct vie *vie)
{
if (cpu_mode == CPU_MODE_64BIT) {
OpenPOWER on IntegriCloud