summaryrefslogtreecommitdiffstats
path: root/sys/amd64/vmm/vmm.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64/vmm/vmm.c')
-rw-r--r--sys/amd64/vmm/vmm.c402
1 files changed, 344 insertions, 58 deletions
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index c2a9fd1..fa0200e 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -97,6 +97,7 @@ struct vcpu {
int hostcpu; /* (o) vcpu's host cpu */
struct vlapic *vlapic; /* (i) APIC device model */
enum x2apic_state x2apic_state; /* (i) APIC mode */
+ uint64_t exitintinfo; /* (i) events pending at VM exit */
int nmi_pending; /* (i) NMI pending */
int extint_pending; /* (i) INTR pending */
struct vm_exception exception; /* (x) exception collateral */
@@ -242,6 +243,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
+ vcpu->exitintinfo = 0;
vcpu->nmi_pending = 0;
vcpu->extint_pending = 0;
vcpu->exception_pending = 0;
@@ -571,6 +573,21 @@ vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
return (0);
}
+static vm_paddr_t
+vm_maxmem(struct vm *vm)
+{
+ int i;
+ vm_paddr_t gpa, maxmem;
+
+ maxmem = 0;
+ for (i = 0; i < vm->num_mem_segs; i++) {
+ gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len;
+ if (gpa > maxmem)
+ maxmem = gpa;
+ }
+ return (maxmem);
+}
+
static void
vm_gpa_unwire(struct vm *vm)
{
@@ -708,7 +725,7 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
if (ppt_assigned_devices(vm) == 0) {
KASSERT(vm->iommu == NULL,
("vm_assign_pptdev: iommu must be NULL"));
- maxaddr = vmm_mem_maxaddr();
+ maxaddr = vm_maxmem(vm);
vm->iommu = iommu_create_domain(maxaddr);
error = vm_gpa_wire(vm);
@@ -1104,6 +1121,10 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
}
}
+ /* Don't go to sleep if the vcpu thread needs to yield */
+ if (vcpu_should_yield(vm, vcpuid))
+ break;
+
/*
* Some Linux guests implement "halt" by having all vcpus
* execute HLT with interrupts disabled. 'halted_cpus' keeps
@@ -1127,7 +1148,11 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
t = ticks;
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
- msleep_spin(vcpu, &vcpu->mtx, wmesg, 0);
+ /*
+ * XXX msleep_spin() cannot be interrupted by signals so
+ * wake up periodically to check pending signals.
+ */
+ msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
}
@@ -1191,15 +1216,18 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
struct vm_guest_paging *paging;
mem_region_read_t mread;
mem_region_write_t mwrite;
- int error;
+ enum vm_cpu_mode cpu_mode;
+ int cs_d, error;
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
gla = vme->u.inst_emul.gla;
gpa = vme->u.inst_emul.gpa;
+ cs_d = vme->u.inst_emul.cs_d;
vie = &vme->u.inst_emul.vie;
paging = &vme->u.inst_emul.paging;
+ cpu_mode = paging->cpu_mode;
vie_init(vie);
@@ -1213,7 +1241,7 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
else if (error != 0)
panic("%s: vmm_fetch_instruction error %d", __func__, error);
- if (vmm_decode_instruction(vm, vcpuid, gla, paging->cpu_mode, vie) != 0)
+ if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0)
return (EFAULT);
/* return to userland unless this is an in-kernel emulated device */
@@ -1231,8 +1259,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
return (0);
}
- error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
- retu);
+ error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging,
+ mread, mwrite, retu);
return (error);
}
@@ -1456,6 +1484,202 @@ restart:
}
int
+vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
+{
+ struct vcpu *vcpu;
+ int type, vector;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ if (info & VM_INTINFO_VALID) {
+ type = info & VM_INTINFO_TYPE;
+ vector = info & 0xff;
+ if (type == VM_INTINFO_NMI && vector != IDT_NMI)
+ return (EINVAL);
+ if (type == VM_INTINFO_HWEXCEPTION && vector >= 32)
+ return (EINVAL);
+ if (info & VM_INTINFO_RSVD)
+ return (EINVAL);
+ } else {
+ info = 0;
+ }
+ VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info);
+ vcpu->exitintinfo = info;
+ return (0);
+}
+
+enum exc_class {
+ EXC_BENIGN,
+ EXC_CONTRIBUTORY,
+ EXC_PAGEFAULT
+};
+
+#define IDT_VE 20 /* Virtualization Exception (Intel specific) */
+
+static enum exc_class
+exception_class(uint64_t info)
+{
+ int type, vector;
+
+ KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info));
+ type = info & VM_INTINFO_TYPE;
+ vector = info & 0xff;
+
+ /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */
+ switch (type) {
+ case VM_INTINFO_HWINTR:
+ case VM_INTINFO_SWINTR:
+ case VM_INTINFO_NMI:
+ return (EXC_BENIGN);
+ default:
+ /*
+ * Hardware exception.
+ *
+ * SVM and VT-x use identical type values to represent NMI,
+ * hardware interrupt and software interrupt.
+ *
+ * SVM uses type '3' for all exceptions. VT-x uses type '3'
+ * for exceptions except #BP and #OF. #BP and #OF use a type
+ * value of '5' or '6'. Therefore we don't check for explicit
+ * values of 'type' to classify 'intinfo' into a hardware
+ * exception.
+ */
+ break;
+ }
+
+ switch (vector) {
+ case IDT_PF:
+ case IDT_VE:
+ return (EXC_PAGEFAULT);
+ case IDT_DE:
+ case IDT_TS:
+ case IDT_NP:
+ case IDT_SS:
+ case IDT_GP:
+ return (EXC_CONTRIBUTORY);
+ default:
+ return (EXC_BENIGN);
+ }
+}
+
+static int
+nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
+ uint64_t *retinfo)
+{
+ enum exc_class exc1, exc2;
+ int type1, vector1;
+
+ KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1));
+ KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2));
+
+ /*
+ * If an exception occurs while attempting to call the double-fault
+ * handler the processor enters shutdown mode (aka triple fault).
+ */
+ type1 = info1 & VM_INTINFO_TYPE;
+ vector1 = info1 & 0xff;
+ if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
+ VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)",
+ info1, info2);
+ vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
+ *retinfo = 0;
+ return (0);
+ }
+
+ /*
+ * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3
+ */
+ exc1 = exception_class(info1);
+ exc2 = exception_class(info2);
+ if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) ||
+ (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) {
+ /* Convert nested fault into a double fault. */
+ *retinfo = IDT_DF;
+ *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
+ *retinfo |= VM_INTINFO_DEL_ERRCODE;
+ } else {
+ /* Handle exceptions serially */
+ *retinfo = info2;
+ }
+ return (1);
+}
+
+static uint64_t
+vcpu_exception_intinfo(struct vcpu *vcpu)
+{
+ uint64_t info = 0;
+
+ if (vcpu->exception_pending) {
+ info = vcpu->exception.vector & 0xff;
+ info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
+ if (vcpu->exception.error_code_valid) {
+ info |= VM_INTINFO_DEL_ERRCODE;
+ info |= (uint64_t)vcpu->exception.error_code << 32;
+ }
+ }
+ return (info);
+}
+
+int
+vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
+{
+ struct vcpu *vcpu;
+ uint64_t info1, info2;
+ int valid;
+
+ KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ info1 = vcpu->exitintinfo;
+ vcpu->exitintinfo = 0;
+
+ info2 = 0;
+ if (vcpu->exception_pending) {
+ info2 = vcpu_exception_intinfo(vcpu);
+ vcpu->exception_pending = 0;
+ VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
+ vcpu->exception.vector, info2);
+ }
+
+ if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
+ valid = nested_fault(vm, vcpuid, info1, info2, retinfo);
+ } else if (info1 & VM_INTINFO_VALID) {
+ *retinfo = info1;
+ valid = 1;
+ } else if (info2 & VM_INTINFO_VALID) {
+ *retinfo = info2;
+ valid = 1;
+ } else {
+ valid = 0;
+ }
+
+ if (valid) {
+ VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), "
+ "retinfo(%#lx)", __func__, info1, info2, *retinfo);
+ }
+
+ return (valid);
+}
+
+int
+vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ *info1 = vcpu->exitintinfo;
+ *info2 = vcpu_exception_intinfo(vcpu);
+ return (0);
+}
+
+int
vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
{
struct vcpu *vcpu;
@@ -1466,6 +1690,14 @@ vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
if (exception->vector < 0 || exception->vector >= 32)
return (EINVAL);
+ /*
+ * A double fault exception should never be injected directly into
+ * the guest. It is a derived exception that results from specific
+ * combinations of nested faults.
+ */
+ if (exception->vector == IDT_DF)
+ return (EINVAL);
+
vcpu = &vm->vcpu[vcpuid];
if (vcpu->exception_pending) {
@@ -1481,32 +1713,21 @@ vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
return (0);
}
-int
-vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception)
-{
- struct vcpu *vcpu;
- int pending;
-
- KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
-
- vcpu = &vm->vcpu[vcpuid];
- pending = vcpu->exception_pending;
- if (pending) {
- vcpu->exception_pending = 0;
- *exception = vcpu->exception;
- VCPU_CTR1(vm, vcpuid, "Exception %d delivered",
- exception->vector);
- }
- return (pending);
-}
-
-static void
-vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception)
+void
+vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid,
+ int errcode)
{
+ struct vm_exception exception;
struct vm_exit *vmexit;
+ struct vm *vm;
int error;
- error = vm_inject_exception(vm, vcpuid, exception);
+ vm = vmarg;
+
+ exception.vector = vector;
+ exception.error_code = errcode;
+ exception.error_code_valid = errcode_valid;
+ error = vm_inject_exception(vm, vcpuid, &exception);
KASSERT(error == 0, ("vm_inject_exception error %d", error));
/*
@@ -1521,45 +1742,19 @@ vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception)
}
void
-vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2)
+vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2)
{
- struct vm_exception pf = {
- .vector = IDT_PF,
- .error_code_valid = 1,
- .error_code = error_code
- };
+ struct vm *vm;
int error;
+ vm = vmarg;
VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
error_code, cr2);
error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
- vm_inject_fault(vm, vcpuid, &pf);
-}
-
-void
-vm_inject_gp(struct vm *vm, int vcpuid)
-{
- struct vm_exception gpf = {
- .vector = IDT_GP,
- .error_code_valid = 1,
- .error_code = 0
- };
-
- vm_inject_fault(vm, vcpuid, &gpf);
-}
-
-void
-vm_inject_ud(struct vm *vm, int vcpuid)
-{
- struct vm_exception udf = {
- .vector = IDT_UD,
- .error_code_valid = 0
- };
-
- vm_inject_fault(vm, vcpuid, &udf);
+ vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code);
}
static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
@@ -1993,6 +2188,97 @@ vm_segment_name(int seg)
return (seg_names[seg]);
}
+void
+vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
+ int num_copyinfo)
+{
+ int idx;
+
+ for (idx = 0; idx < num_copyinfo; idx++) {
+ if (copyinfo[idx].cookie != NULL)
+ vm_gpa_release(copyinfo[idx].cookie);
+ }
+ bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo));
+}
+
+int
+vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+ uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
+ int num_copyinfo)
+{
+ int error, idx, nused;
+ size_t n, off, remaining;
+ void *hva, *cookie;
+ uint64_t gpa;
+
+ bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo);
+
+ nused = 0;
+ remaining = len;
+ while (remaining > 0) {
+ KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
+ error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa);
+ if (error)
+ return (error);
+ off = gpa & PAGE_MASK;
+ n = min(remaining, PAGE_SIZE - off);
+ copyinfo[nused].gpa = gpa;
+ copyinfo[nused].len = n;
+ remaining -= n;
+ gla += n;
+ nused++;
+ }
+
+ for (idx = 0; idx < nused; idx++) {
+ hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len,
+ prot, &cookie);
+ if (hva == NULL)
+ break;
+ copyinfo[idx].hva = hva;
+ copyinfo[idx].cookie = cookie;
+ }
+
+ if (idx != nused) {
+ vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo);
+ return (-1);
+ } else {
+ return (0);
+ }
+}
+
+void
+vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
+ size_t len)
+{
+ char *dst;
+ int idx;
+
+ dst = kaddr;
+ idx = 0;
+ while (len > 0) {
+ bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len);
+ len -= copyinfo[idx].len;
+ dst += copyinfo[idx].len;
+ idx++;
+ }
+}
+
+void
+vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
+ struct vm_copyinfo *copyinfo, size_t len)
+{
+ const char *src;
+ int idx;
+
+ src = kaddr;
+ idx = 0;
+ while (len > 0) {
+ bcopy(src, copyinfo[idx].hva, copyinfo[idx].len);
+ len -= copyinfo[idx].len;
+ src += copyinfo[idx].len;
+ idx++;
+ }
+}
/*
* Return the amount of in-use and wired memory for the VM. Since
OpenPOWER on IntegriCloud