summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorneel <neel@FreeBSD.org>2015-06-28 01:21:55 +0000
committerneel <neel@FreeBSD.org>2015-06-28 01:21:55 +0000
commitc85aee0195bf01e2e4666927083768c4f182c0c0 (patch)
treee72b32c6a72fb1e7f1b2f913718581599207091c
parent115742fae3f7a7c52d6d5f4894f37e68dff4fd5c (diff)
downloadFreeBSD-src-c85aee0195bf01e2e4666927083768c4f182c0c0.zip
FreeBSD-src-c85aee0195bf01e2e4666927083768c4f182c0c0.tar.gz
MFC r279444:
Allow passthrough devices to be hinted. MFC r279683: When ICW1 is issued the edge sense circuit is reset which means that following an initialization a low-to-high transistion is necesary to generate an interrupt. MFC r279925: Add -p parameter to list PCI device to pass through to the guest. MFC r281559: Fix handling of BUS_PROBE_NOWILDCARD in 'device_probe_child()'. MFC r280447: When fetching an instruction in non-64bit mode, consider the value of the code segment base address. MFC r280725: Move legacy interrupt allocation for virtio devices to common code. MFC r280775: Fix the RTC device model to operate correctly in 12-hour mode. MFC r280929: Fix "MOVS" instruction memory to MMIO emulation. MFC r280968: Display instruction bytes and %rip prior to aborting due to an instruction emulation error. MFC r281145: Enhance the support for Group 1 Extended opcodes for CMP, AND, OR instructions. MFC r281542: Initialize 'error' before use (Coverity IDs 1249748, 1249747, 1249751, 1249749) MFC r281561: Prior to aborting due to an ioport error, it is always interesting to see what the guest's %rip is. MFC r281611: If the number of guest vcpus is less than '1' then flag it as an error. MFC r281612: Prefer 'vcpu_should_yield()' over checking 'curthread->td_flags' directly. MFC r281630: Relax the check on which vectors can be delivered through the APIC. According to the Intel SDM vectors 16 through 255 are allowed to be delivered via the local APIC. MFC r281879: Missing break in switch case (Coverity ID 1292499) MFC r281946: Don't allow guest to modify readonly bits in the PCI config 'status' register. MFC r281987: STOS/STOSB/STOSW/STOSD/STOSQ instruction emulation. MFC r282206: Implement the century byte in the RTC.
-rw-r--r--lib/libvmmapi/vmmapi.c12
-rw-r--r--lib/libvmmapi/vmmapi.h2
-rwxr-xr-xshare/examples/bhyve/vmrun.sh16
-rw-r--r--sys/amd64/include/vmm.h1
-rw-r--r--sys/amd64/include/vmm_instruction_emul.h2
-rw-r--r--sys/amd64/vmm/amd/svm.c9
-rw-r--r--sys/amd64/vmm/intel/vmx.c6
-rw-r--r--sys/amd64/vmm/io/ppt.c78
-rw-r--r--sys/amd64/vmm/io/vatpic.c1
-rw-r--r--sys/amd64/vmm/io/vrtc.c113
-rw-r--r--sys/amd64/vmm/vmm.c16
-rw-r--r--sys/amd64/vmm/vmm_dev.c4
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c286
-rw-r--r--sys/amd64/vmm/vmm_lapic.c6
-rw-r--r--sys/kern/subr_bus.c18
-rw-r--r--usr.sbin/bhyve/acpi.c2
-rw-r--r--usr.sbin/bhyve/bhyverun.c30
-rw-r--r--usr.sbin/bhyve/pci_emul.c87
-rw-r--r--usr.sbin/bhyve/pci_virtio_block.c2
-rw-r--r--usr.sbin/bhyve/pci_virtio_net.c2
-rw-r--r--usr.sbin/bhyve/virtio.c5
-rw-r--r--usr.sbin/bhyvectl/bhyvectl.c18
22 files changed, 503 insertions, 213 deletions
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
index 9828876..0c15845 100644
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -979,6 +979,18 @@ gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
return (error);
}
+int
+vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa)
+{
+ int error, fault;
+
+ error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, gpa);
+ if (fault)
+ error = fault;
+ return (error);
+}
+
#ifndef min
#define min(a,b) (((a) < (b)) ? (a) : (b))
#endif
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
index 06b2930..d001cd8 100644
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -63,6 +63,8 @@ int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
+int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa);
uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
void vm_set_memflags(struct vmctx *ctx, int flags);
diff --git a/share/examples/bhyve/vmrun.sh b/share/examples/bhyve/vmrun.sh
index d3d5cdf..eeaf281 100755
--- a/share/examples/bhyve/vmrun.sh
+++ b/share/examples/bhyve/vmrun.sh
@@ -62,6 +62,7 @@ usage() {
echo " -i: force boot of the Installation CDROM image"
echo " -I: Installation CDROM image location (default is ${DEFAULT_ISOFILE})"
echo " -m: memory size (default is ${DEFAULT_MEMSIZE})"
+ echo " -p: pass-through a host PCI device at bus/slot/func (e.g. 10/0/0)"
echo " -t: tap device for virtio-net (default is $DEFAULT_TAPDEV)"
echo ""
[ -n "$msg" ] && errmsg "$msg"
@@ -89,8 +90,9 @@ disk_total=0
apic_opt=""
gdbport=0
loader_opt=""
+pass_total=0
-while getopts ac:C:d:e:g:hH:iI:m:t: c ; do
+while getopts ac:C:d:e:g:hH:iI:m:p:t: c ; do
case $c in
a)
apic_opt="-a"
@@ -126,6 +128,10 @@ while getopts ac:C:d:e:g:hH:iI:m:t: c ; do
m)
memsize=${OPTARG}
;;
+ p)
+ eval "pass_dev${pass_total}=\"${OPTARG}\""
+ pass_total=$(($pass_total + 1))
+ ;;
t)
eval "tap_dev${tap_total}=\"${OPTARG}\""
tap_total=$(($tap_total + 1))
@@ -249,6 +255,14 @@ while [ 1 ]; do
i=$(($i + 1))
done
+ i=0
+ while [ $i -lt $pass_total ] ; do
+ eval "pass=\$pass_dev${i}"
+ devargs="$devargs -s $nextslot:0,passthru,${pass} "
+ nextslot=$(($nextslot + 1))
+ i=$(($i + 1))
+ done
+
${FBSDRUN} -c ${cpus} -m ${memsize} ${apic_opt} -A -H -P \
-g ${gdbport} \
-s 0:0,hostbridge \
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index cf7f5bc..52294bd 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -551,6 +551,7 @@ struct vm_exit {
struct {
uint64_t gpa;
uint64_t gla;
+ uint64_t cs_base;
int cs_d; /* CS.D */
struct vm_guest_paging paging;
struct vie vie;
diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h
index 516cc01..651b3b3 100644
--- a/sys/amd64/include/vmm_instruction_emul.h
+++ b/sys/amd64/include/vmm_instruction_emul.h
@@ -90,7 +90,7 @@ int vmm_fetch_instruction(struct vm *vm, int cpuid,
* Returns 1 if an exception was injected into the guest.
* Returns -1 otherwise.
*/
-int vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
uint64_t gla, int prot, uint64_t *gpa);
void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index 88a846d..7cc13ca 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -799,8 +799,14 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error));
switch(paging->cpu_mode) {
+ case CPU_MODE_REAL:
+ vmexit->u.inst_emul.cs_base = seg.base;
+ vmexit->u.inst_emul.cs_d = 0;
+ break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
+ vmexit->u.inst_emul.cs_base = seg.base;
+
/*
* Section 4.8.1 of APM2, Default Operand Size or D bit.
*/
@@ -808,6 +814,7 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
1 : 0;
break;
default:
+ vmexit->u.inst_emul.cs_base = 0;
vmexit->u.inst_emul.cs_d = 0;
break;
}
@@ -1911,7 +1918,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
}
/* We are asked to give the cpu by scheduler. */
- if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) {
+ if (vcpu_should_yield(vm, vcpu)) {
enable_gintr();
vm_exit_astpending(vm, vcpu, state->rip);
break;
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index b81e48b..9aa55e2 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -1785,12 +1785,18 @@ vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
vmexit->u.inst_emul.gla = gla;
vmx_paging_info(paging);
switch (paging->cpu_mode) {
+ case CPU_MODE_REAL:
+ vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
+ vmexit->u.inst_emul.cs_d = 0;
+ break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
+ vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS);
vmexit->u.inst_emul.cs_d = SEG_DESC_DEF32(csar);
break;
default:
+ vmexit->u.inst_emul.cs_base = 0;
vmexit->u.inst_emul.cs_d = 0;
break;
}
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
index fa7083e..b789f77 100644
--- a/sys/amd64/vmm/io/ppt.c
+++ b/sys/amd64/vmm/io/ppt.c
@@ -56,7 +56,6 @@ __FBSDID("$FreeBSD$");
/* XXX locking */
-#define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0]))
#define MAX_MSIMSGS 32
/*
@@ -77,9 +76,10 @@ struct pptintr_arg { /* pptintr(pptintr_arg) */
uint64_t msg_data;
};
-static struct pptdev {
+struct pptdev {
device_t dev;
struct vm *vm; /* owner of this device */
+ TAILQ_ENTRY(pptdev) next;
struct vm_memory_segment mmio[MAX_MMIOSEGS];
struct {
int num_msgs; /* guest state */
@@ -99,7 +99,7 @@ static struct pptdev {
void **cookie;
struct pptintr_arg *arg;
} msix;
-} pptdevs[64];
+};
SYSCTL_DECL(_hw_vmm);
SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
@@ -108,6 +108,8 @@ static int num_pptdevs;
SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
"number of pci passthru devices");
+static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
+
static int
ppt_probe(device_t dev)
{
@@ -125,26 +127,30 @@ ppt_probe(device_t dev)
* - be allowed by administrator to be used in this role
* - be an endpoint device
*/
- if (vmm_is_pptdev(bus, slot, func) &&
- (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
+ if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
+ return (ENXIO);
+ else if (vmm_is_pptdev(bus, slot, func))
return (0);
else
- return (ENXIO);
+ /*
+ * Returning BUS_PROBE_NOWILDCARD here matches devices that the
+ * SR-IOV infrastructure specified as "ppt" passthrough devices.
+ * All normal devices that did not have "ppt" specified as their
+ * driver will not be matched by this.
+ */
+ return (BUS_PROBE_NOWILDCARD);
}
static int
ppt_attach(device_t dev)
{
- int n;
+ struct pptdev *ppt;
- if (num_pptdevs >= MAX_PPTDEVS) {
- printf("ppt_attach: maximum number of pci passthrough devices "
- "exceeded\n");
- return (ENXIO);
- }
+ ppt = device_get_softc(dev);
- n = num_pptdevs++;
- pptdevs[n].dev = dev;
+ num_pptdevs++;
+ TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
+ ppt->dev = dev;
if (bootverbose)
device_printf(dev, "attached\n");
@@ -155,10 +161,14 @@ ppt_attach(device_t dev)
static int
ppt_detach(device_t dev)
{
- /*
- * XXX check whether there are any pci passthrough devices assigned
- * to guests before we allow this driver to detach.
- */
+ struct pptdev *ppt;
+
+ ppt = device_get_softc(dev);
+
+ if (ppt->vm != NULL)
+ return (EBUSY);
+ num_pptdevs--;
+ TAILQ_REMOVE(&pptdev_list, ppt, next);
return (0);
}
@@ -172,22 +182,23 @@ static device_method_t ppt_methods[] = {
};
static devclass_t ppt_devclass;
-DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
+DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
static struct pptdev *
ppt_find(int bus, int slot, int func)
{
device_t dev;
- int i, b, s, f;
+ struct pptdev *ppt;
+ int b, s, f;
- for (i = 0; i < num_pptdevs; i++) {
- dev = pptdevs[i].dev;
+ TAILQ_FOREACH(ppt, &pptdev_list, next) {
+ dev = ppt->dev;
b = pci_get_bus(dev);
s = pci_get_slot(dev);
f = pci_get_function(dev);
if (bus == b && slot == s && func == f)
- return (&pptdevs[i]);
+ return (ppt);
}
return (NULL);
}
@@ -297,11 +308,12 @@ ppt_avail_devices(void)
int
ppt_assigned_devices(struct vm *vm)
{
- int i, num;
+ struct pptdev *ppt;
+ int num;
num = 0;
- for (i = 0; i < num_pptdevs; i++) {
- if (pptdevs[i].vm == vm)
+ TAILQ_FOREACH(ppt, &pptdev_list, next) {
+ if (ppt->vm == vm)
num++;
}
return (num);
@@ -310,12 +322,11 @@ ppt_assigned_devices(struct vm *vm)
boolean_t
ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
{
- int i, n;
+ int i;
struct pptdev *ppt;
struct vm_memory_segment *seg;
- for (n = 0; n < num_pptdevs; n++) {
- ppt = &pptdevs[n];
+ TAILQ_FOREACH(ppt, &pptdev_list, next) {
if (ppt->vm != vm)
continue;
@@ -377,12 +388,13 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
int
ppt_unassign_all(struct vm *vm)
{
- int i, bus, slot, func;
+ struct pptdev *ppt;
+ int bus, slot, func;
device_t dev;
- for (i = 0; i < num_pptdevs; i++) {
- if (pptdevs[i].vm == vm) {
- dev = pptdevs[i].dev;
+ TAILQ_FOREACH(ppt, &pptdev_list, next) {
+ if (ppt->vm == vm) {
+ dev = ppt->dev;
bus = pci_get_bus(dev);
slot = pci_get_slot(dev);
func = pci_get_function(dev);
diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c
index 328c35f..0df6e7c 100644
--- a/sys/amd64/vmm/io/vatpic.c
+++ b/sys/amd64/vmm/io/vatpic.c
@@ -275,6 +275,7 @@ vatpic_icw1(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
atpic->ready = false;
atpic->icw_num = 1;
+ atpic->request = 0;
atpic->mask = 0;
atpic->lowprio = 7;
atpic->rd_cmd_reg = 0;
diff --git a/sys/amd64/vmm/io/vrtc.c b/sys/amd64/vmm/io/vrtc.c
index d5e93dc..9d406c1 100644
--- a/sys/amd64/vmm/io/vrtc.c
+++ b/sys/amd64/vmm/io/vrtc.c
@@ -63,9 +63,12 @@ struct rtcdev {
uint8_t reg_b;
uint8_t reg_c;
uint8_t reg_d;
- uint8_t nvram[128 - 14];
+ uint8_t nvram[36];
+ uint8_t century;
+ uint8_t nvram2[128 - 51];
} __packed;
CTASSERT(sizeof(struct rtcdev) == 128);
+CTASSERT(offsetof(struct rtcdev, century) == RTC_CENTURY);
struct vrtc {
struct vm *vm;
@@ -214,9 +217,27 @@ secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
rtc->sec = rtcset(rtc, ct.sec);
rtc->min = rtcset(rtc, ct.min);
- hour = ct.hour;
- if ((rtc->reg_b & RTCSB_24HR) == 0)
- hour = (hour % 12) + 1; /* convert to a 12-hour format */
+ if (rtc->reg_b & RTCSB_24HR) {
+ hour = ct.hour;
+ } else {
+ /*
+ * Convert to the 12-hour format.
+ */
+ switch (ct.hour) {
+ case 0: /* 12 AM */
+ case 12: /* 12 PM */
+ hour = 12;
+ break;
+ default:
+ /*
+ * The remaining 'ct.hour' values are interpreted as:
+ * [1 - 11] -> 1 - 11 AM
+ * [13 - 23] -> 1 - 11 PM
+ */
+ hour = ct.hour % 12;
+ break;
+ }
+ }
rtc->hour = rtcset(rtc, hour);
@@ -227,6 +248,7 @@ secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
rtc->day_of_month = rtcset(rtc, ct.day);
rtc->month = rtcset(rtc, ct.mon);
rtc->year = rtcset(rtc, ct.year % 100);
+ rtc->century = rtcset(rtc, ct.year / 100);
}
static int
@@ -256,7 +278,7 @@ rtc_to_secs(struct vrtc *vrtc)
struct timespec ts;
struct rtcdev *rtc;
struct vm *vm;
- int error, hour, pm, year;
+ int century, error, hour, pm, year;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
@@ -287,9 +309,26 @@ rtc_to_secs(struct vrtc *vrtc)
}
error = rtcget(rtc, hour, &ct.hour);
if ((rtc->reg_b & RTCSB_24HR) == 0) {
- ct.hour -= 1;
- if (pm)
- ct.hour += 12;
+ if (ct.hour >= 1 && ct.hour <= 12) {
+ /*
+ * Convert from 12-hour format to internal 24-hour
+ * representation as follows:
+ *
+ * 12-hour format ct.hour
+ * 12 AM 0
+ * 1 - 11 AM 1 - 11
+ * 12 PM 12
+ * 1 - 11 PM 13 - 23
+ */
+ if (ct.hour == 12)
+ ct.hour = 0;
+ if (pm)
+ ct.hour += 12;
+ } else {
+ VM_CTR2(vm, "Invalid RTC 12-hour format %#x/%d",
+ rtc->hour, ct.hour);
+ goto fail;
+ }
}
if (error || ct.hour < 0 || ct.hour > 23) {
@@ -323,10 +362,14 @@ rtc_to_secs(struct vrtc *vrtc)
VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year);
goto fail;
}
- if (year >= 70)
- ct.year = 1900 + year;
- else
- ct.year = 2000 + year;
+
+ error = rtcget(rtc, rtc->century, &century);
+ ct.year = century * 100 + year;
+ if (error || ct.year < POSIX_BASE_YEAR) {
+ VM_CTR2(vm, "Invalid RTC century %#x/%d", rtc->century,
+ ct.year);
+ goto fail;
+ }
error = clock_ct_to_ts(&ct, &ts);
if (error || ts.tv_sec < 0) {
@@ -338,7 +381,12 @@ rtc_to_secs(struct vrtc *vrtc)
}
return (ts.tv_sec); /* success */
fail:
- return (VRTC_BROKEN_TIME); /* failure */
+ /*
+ * Stop updating the RTC if the date/time fields programmed by
+ * the guest are invalid.
+ */
+ VM_CTR0(vrtc->vm, "Invalid RTC date/time programming detected");
+ return (VRTC_BROKEN_TIME);
}
static int
@@ -593,13 +641,6 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
if ((newval & RTCSB_HALT) == 0) {
rtctime = rtc_to_secs(vrtc);
if (rtctime == VRTC_BROKEN_TIME) {
- /*
- * Stop updating the RTC if the date/time
- * programmed by the guest is not correct.
- */
- VM_CTR0(vrtc->vm, "Invalid RTC date/time "
- "programming detected");
-
if (rtc_flag_broken_time)
return (-1);
}
@@ -742,7 +783,7 @@ vrtc_nvram_write(struct vm *vm, int offset, uint8_t value)
* Don't allow writes to RTC control registers or the date/time fields.
*/
if (offset < offsetof(struct rtcdev, nvram[0]) ||
- offset >= sizeof(struct rtcdev)) {
+ offset == RTC_CENTURY || offset >= sizeof(struct rtcdev)) {
VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d",
offset);
return (EINVAL);
@@ -776,7 +817,7 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
/*
* Update RTC date/time fields if necessary.
*/
- if (offset < 10) {
+ if (offset < 10 || offset == RTC_CENTURY) {
curtime = vrtc_curtime(vrtc);
secs_to_rtc(curtime, vrtc, 0);
}
@@ -837,13 +878,17 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
curtime = vrtc_curtime(vrtc);
vrtc_time_update(vrtc, curtime);
- if (in) {
- /*
- * Update RTC date/time fields if necessary.
- */
- if (offset < 10)
- secs_to_rtc(curtime, vrtc, 0);
+ /*
+ * Update RTC date/time fields if necessary.
+ *
+ * This is not just for reads of the RTC. The side-effect of writing
+ * the century byte requires other RTC date/time fields (e.g. sec)
+ * to be updated here.
+ */
+ if (offset < 10 || offset == RTC_CENTURY)
+ secs_to_rtc(curtime, vrtc, 0);
+ if (in) {
if (offset == 12) {
/*
* XXX
@@ -887,6 +932,18 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
*((uint8_t *)rtc + offset) = *val;
break;
}
+
+ /*
+ * XXX some guests (e.g. OpenBSD) write the century byte
+ * outside of RTCSB_HALT so re-calculate the RTC date/time.
+ */
+ if (offset == RTC_CENTURY && !rtc_halted(vrtc)) {
+ curtime = rtc_to_secs(vrtc);
+ error = vrtc_time_update(vrtc, curtime);
+ KASSERT(!error, ("vrtc_time_update error %d", error));
+ if (curtime == VRTC_BROKEN_TIME && rtc_flag_broken_time)
+ error = -1;
+ }
}
VRTC_UNLOCK(vrtc);
return (error);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 7f90c61..0e78272 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -218,6 +218,11 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
&trace_guest_exceptions, 0,
"Trap into hypervisor on all guest exceptions and reflect them back");
+static int vmm_force_iommu = 0;
+TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu);
+SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
+ "Force use of I/O MMU even if no passthrough devices were found.");
+
static void
vcpu_cleanup(struct vm *vm, int i, bool destroy)
{
@@ -322,7 +327,7 @@ vmm_handler(module_t mod, int what, void *arg)
switch (what) {
case MOD_LOAD:
vmmdev_init();
- if (ppt_avail_devices() > 0)
+ if (vmm_force_iommu || ppt_avail_devices() > 0)
iommu_init();
error = vmm_init();
if (error == 0)
@@ -1248,7 +1253,7 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
struct vie *vie;
struct vcpu *vcpu;
struct vm_exit *vme;
- uint64_t gla, gpa;
+ uint64_t gla, gpa, cs_base;
struct vm_guest_paging *paging;
mem_region_read_t mread;
mem_region_write_t mwrite;
@@ -1260,6 +1265,7 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
gla = vme->u.inst_emul.gla;
gpa = vme->u.inst_emul.gpa;
+ cs_base = vme->u.inst_emul.cs_base;
cs_d = vme->u.inst_emul.cs_d;
vie = &vme->u.inst_emul.vie;
paging = &vme->u.inst_emul.paging;
@@ -1274,8 +1280,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
* maximum size instruction.
*/
length = vme->inst_length ? vme->inst_length : VIE_INST_SIZE;
- error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip,
- length, vie);
+ error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip +
+ cs_base, length, vie);
} else {
/*
* The instruction bytes have already been copied into 'vie'
@@ -2328,7 +2334,7 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
remaining = len;
while (remaining > 0) {
KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
- error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa);
+ error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa);
if (error)
return (error);
off = gpa & PAGE_MASK;
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index 0293d191..5be99cb 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -440,10 +440,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
CTASSERT(PROT_WRITE == VM_PROT_WRITE);
CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
gg = (struct vm_gla2gpa *)data;
- error = vmm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
+ error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
gg->prot, &gg->gpa);
KASSERT(error == 0 || error == 1 || error == -1,
- ("%s: vmm_gla2gpa unknown error %d", __func__, error));
+ ("%s: vm_gla2gpa unknown error %d", __func__, error));
if (error >= 0) {
/*
* error = 0: the translation was successful
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 3db890e..6f75515 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -71,6 +71,8 @@ enum {
VIE_OP_TYPE_CMP,
VIE_OP_TYPE_POP,
VIE_OP_TYPE_MOVS,
+ VIE_OP_TYPE_GROUP1,
+ VIE_OP_TYPE_STOS,
VIE_OP_TYPE_LAST
};
@@ -145,6 +147,16 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_type = VIE_OP_TYPE_MOVS,
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
},
+ [0xAA] = {
+ .op_byte = 0xAA,
+ .op_type = VIE_OP_TYPE_STOS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
+ [0xAB] = {
+ .op_byte = 0xAB,
+ .op_type = VIE_OP_TYPE_STOS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
[0xC6] = {
/* XXX Group 11 extended opcode - not just MOV */
.op_byte = 0xC6,
@@ -161,15 +173,15 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_type = VIE_OP_TYPE_AND,
},
[0x81] = {
- /* XXX Group 1 extended opcode - not just AND */
+ /* XXX Group 1 extended opcode */
.op_byte = 0x81,
- .op_type = VIE_OP_TYPE_AND,
+ .op_type = VIE_OP_TYPE_GROUP1,
.op_flags = VIE_OP_F_IMM,
},
[0x83] = {
- /* XXX Group 1 extended opcode - not just OR */
+ /* XXX Group 1 extended opcode */
.op_byte = 0x83,
- .op_type = VIE_OP_TYPE_OR,
+ .op_type = VIE_OP_TYPE_GROUP1,
.op_flags = VIE_OP_F_IMM8,
},
[0x8F] = {
@@ -634,7 +646,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
#else
struct iovec copyinfo[2];
#endif
- uint64_t dstaddr, srcaddr, val;
+ uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val;
uint64_t rcx, rdi, rsi, rflags;
int error, opsize, seg, repeat;
@@ -669,7 +681,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* (1) memory memory n/a
* (2) memory mmio emulated
* (3) mmio memory emulated
- * (4) mmio mmio not emulated
+ * (4) mmio mmio emulated
*
* At this point we don't have sufficient information to distinguish
* between (2), (3) and (4). We use 'vm_copy_setup()' to tease this
@@ -694,7 +706,8 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
vm_copyin(vm, vcpuid, copyinfo, &val, opsize);
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
- goto done;
+ if (error)
+ goto done;
} else if (error > 0) {
/*
* Resume guest execution to handle fault.
@@ -705,37 +718,55 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* 'vm_copy_setup()' is expected to fail for cases (3) and (4)
* if 'srcaddr' is in the mmio space.
*/
- }
- error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
- PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr);
- if (error)
- goto done;
-
- error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
- PROT_WRITE, copyinfo, nitems(copyinfo));
- if (error == 0) {
- /*
- * case (3): read from MMIO and write to system memory.
- *
- * A MMIO read can have side-effects so we commit to it
- * only after vm_copy_setup() is successful. If a page-fault
- * needs to be injected into the guest then it will happen
- * before the MMIO read is attempted.
- */
- error = memread(vm, vcpuid, gpa, &val, opsize, arg);
+ error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
+ PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr);
if (error)
goto done;
- vm_copyout(vm, vcpuid, &val, copyinfo, opsize);
- vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
- } else if (error > 0) {
- /*
- * Resume guest execution to handle fault.
- */
- goto done;
- } else {
- goto done;
+ error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
+ PROT_WRITE, copyinfo, nitems(copyinfo));
+ if (error == 0) {
+ /*
+ * case (3): read from MMIO and write to system memory.
+ *
+ * A MMIO read can have side-effects so we
+ * commit to it only after vm_copy_setup() is
+ * successful. If a page-fault needs to be
+ * injected into the guest then it will happen
+ * before the MMIO read is attempted.
+ */
+ error = memread(vm, vcpuid, gpa, &val, opsize, arg);
+ if (error)
+ goto done;
+
+ vm_copyout(vm, vcpuid, &val, copyinfo, opsize);
+ vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+ } else if (error > 0) {
+ /*
+ * Resume guest execution to handle fault.
+ */
+ goto done;
+ } else {
+ /*
+ * Case (4): read from and write to mmio.
+ */
+ error = vm_gla2gpa(vm, vcpuid, paging, srcaddr,
+ PROT_READ, &srcgpa);
+ if (error)
+ goto done;
+ error = memread(vm, vcpuid, srcgpa, &val, opsize, arg);
+ if (error)
+ goto done;
+
+ error = vm_gla2gpa(vm, vcpuid, paging, dstaddr,
+ PROT_WRITE, &dstgpa);
+ if (error)
+ goto done;
+ error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg);
+ if (error)
+ goto done;
+ }
}
error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
@@ -783,6 +814,68 @@ done:
}
static int
+emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *arg)
+{
+ int error, opsize, repeat;
+ uint64_t val;
+ uint64_t rcx, rdi, rflags;
+
+ opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize;
+ repeat = vie->repz_present | vie->repnz_present;
+
+ if (repeat) {
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+ KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
+
+ /*
+ * The count register is %rcx, %ecx or %cx depending on the
+ * address size of the instruction.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) == 0)
+ return (0);
+ }
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+ KASSERT(!error, ("%s: error %d getting rax", __func__, error));
+
+ error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+ if (error)
+ return (error);
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+ KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+ if (rflags & PSL_D)
+ rdi -= opsize;
+ else
+ rdi += opsize;
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
+ vie->addrsize);
+ KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
+
+ if (repeat) {
+ rcx = rcx - 1;
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
+ rcx, vie->addrsize);
+ KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
+
+ /*
+ * Repeat the instruction if the count register is not zero.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) != 0)
+ vm_restart_instruction(vm, vcpuid);
+ }
+
+ return (0);
+}
+
+static int
emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
{
@@ -820,16 +913,18 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = vie_update_register(vm, vcpuid, reg, result, size);
break;
case 0x81:
+ case 0x83:
/*
- * AND/OR mem (ModRM:r/m) with immediate and store the
+ * AND mem (ModRM:r/m) with immediate and store the
* result in mem.
*
- * AND: i = 4
- * OR: i = 1
- * 81 /i op r/m16, imm16
- * 81 /i op r/m32, imm32
- * REX.W + 81 /i op r/m64, imm32 sign-extended to 64
+ * 81 /4 and r/m16, imm16
+ * 81 /4 and r/m32, imm32
+ * REX.W + 81 /4 and r/m64, imm32 sign-extended to 64
*
+ * 83 /4 and r/m16, imm8 sign-extended to 16
+ * 83 /4 and r/m32, imm8 sign-extended to 32
+ * REX.W + 83/4 and r/m64, imm8 sign-extended to 64
*/
/* get the first operand */
@@ -838,26 +933,11 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
break;
/*
- * perform the operation with the pre-fetched immediate
- * operand and write the result
- */
- switch (vie->reg & 7) {
- case 0x4:
- /* modrm:reg == b100, AND */
- result = val1 & vie->immediate;
- break;
- case 0x1:
- /* modrm:reg == b001, OR */
- result = val1 | vie->immediate;
- break;
- default:
- error = EINVAL;
- break;
- }
- if (error)
- break;
-
- error = memwrite(vm, vcpuid, gpa, result, size, arg);
+ * perform the operation with the pre-fetched immediate
+ * operand and write the result
+ */
+ result = val1 & vie->immediate;
+ error = memwrite(vm, vcpuid, gpa, result, size, arg);
break;
default:
break;
@@ -894,20 +974,20 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = EINVAL;
switch (vie->op.op_byte) {
+ case 0x81:
case 0x83:
/*
* OR mem (ModRM:r/m) with immediate and store the
* result in mem.
*
- * 83 /1 OR r/m16, imm8 sign-extended to 16
- * 83 /1 OR r/m32, imm8 sign-extended to 32
- * REX.W + 83/1 OR r/m64, imm8 sign-extended to 64
+ * 81 /1 or r/m16, imm16
+ * 81 /1 or r/m32, imm32
+ * REX.W + 81 /1 or r/m64, imm32 sign-extended to 64
*
- * Currently, only the OR operation of the 0x83 opcode
- * is implemented (ModRM:reg = b001).
+ * 83 /1 or r/m16, imm8 sign-extended to 16
+ * 83 /1 or r/m32, imm8 sign-extended to 32
+ * REX.W + 83/1 or r/m64, imm8 sign-extended to 64
*/
- if ((vie->reg & 7) != 1)
- break;
/* get the first operand */
error = memread(vm, vcpuid, gpa, &val1, size, arg);
@@ -978,11 +1058,37 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (error)
return (error);
+ rflags2 = getcc(size, op1, op2);
+ break;
+ case 0x81:
+ case 0x83:
+ /*
+ * 81 /7 cmp r/m16, imm16
+ * 81 /7 cmp r/m32, imm32
+ * REX.W + 81 /7 cmp r/m64, imm32 sign-extended to 64
+ *
+ * 83 /7 cmp r/m16, imm8 sign-extended to 16
+ * 83 /7 cmp r/m32, imm8 sign-extended to 32
+ * REX.W + 83 /7 cmp r/m64, imm8 sign-extended to 64
+ *
+ * Compare mem (ModRM:r/m) with immediate and set
+ * status flags according to the results. The
+ * comparison is performed by subtracting the
+ * immediate from the first operand and then setting
+ * the status flags.
+ *
+ */
+
+ /* get the first operand */
+ error = memread(vm, vcpuid, gpa, &op1, size, arg);
+ if (error)
+ return (error);
+
+ rflags2 = getcc(size, op1, vie->immediate);
break;
default:
return (EINVAL);
}
- rflags2 = getcc(size, op1, op2);
error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
@@ -1201,6 +1307,34 @@ emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
return (error);
}
+static int
+emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *memarg)
+{
+ int error;
+
+ switch (vie->reg & 7) {
+ case 0x1: /* OR */
+ error = emulate_or(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
+ case 0x4: /* AND */
+ error = emulate_and(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
+ case 0x7: /* CMP */
+ error = emulate_cmp(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
int
vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
struct vm_guest_paging *paging, mem_region_read_t memread,
@@ -1212,6 +1346,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (EINVAL);
switch (vie->op.op_type) {
+ case VIE_OP_TYPE_GROUP1:
+ error = emulate_group1(vm, vcpuid, gpa, vie, paging, memread,
+ memwrite, memarg);
+ break;
case VIE_OP_TYPE_POP:
error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread,
memwrite, memarg);
@@ -1237,6 +1375,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
memwrite, memarg);
break;
+ case VIE_OP_TYPE_STOS:
+ error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread,
+ memwrite, memarg);
+ break;
case VIE_OP_TYPE_AND:
error = emulate_and(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
@@ -1465,7 +1607,7 @@ ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie)
}
int
-vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
uint64_t gla, int prot, uint64_t *gpa)
{
int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable;
@@ -1825,12 +1967,12 @@ decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode)
{
uint8_t x;
- if (cpu_mode == CPU_MODE_REAL)
- return (-1);
-
if (vie->op.op_flags & VIE_OP_F_NO_MODRM)
return (0);
+ if (cpu_mode == CPU_MODE_REAL)
+ return (-1);
+
if (vie_peek(vie, &x))
return (-1);
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
index fa9832e..f06948b 100644
--- a/sys/amd64/vmm/vmm_lapic.c
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -58,7 +58,11 @@ lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
if (cpu < 0 || cpu >= VM_MAXCPU)
return (EINVAL);
- if (vector < 32 || vector > 255)
+ /*
+ * According to section "Maskable Hardware Interrupts" in Intel SDM
+ * vectors 16 through 255 can be delivered through the local APIC.
+ */
+ if (vector < 16 || vector > 255)
return (EINVAL);
vlapic = vm_lapic(vm, cpu);
diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c
index fa058ed..bb6806c 100644
--- a/sys/kern/subr_bus.c
+++ b/sys/kern/subr_bus.c
@@ -2116,6 +2116,16 @@ device_probe_child(device_t dev, device_t child)
}
/*
+ * Probes that return BUS_PROBE_NOWILDCARD or lower
+ * only match on devices whose driver was explicitly
+ * specified.
+ */
+ if (result <= BUS_PROBE_NOWILDCARD &&
+ !(child->flags & DF_FIXEDCLASS)) {
+ result = ENXIO;
+ }
+
+ /*
* The driver returned an error so it
* certainly doesn't match.
*/
@@ -2130,14 +2140,6 @@ device_probe_child(device_t dev, device_t child)
* of pri for the first match.
*/
if (best == NULL || result > pri) {
- /*
- * Probes that return BUS_PROBE_NOWILDCARD
- * or lower only match on devices whose
- * driver was explicitly specified.
- */
- if (result <= BUS_PROBE_NOWILDCARD &&
- !(child->flags & DF_FIXEDCLASS))
- continue;
best = dl;
pri = result;
continue;
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
index a5a6559..a9dd1cc 100644
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -386,7 +386,7 @@ basl_fwrite_fadt(FILE *fp)
EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n");
EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n");
EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n");
- EFPRINTF(fp, "[0001]\t\tRTC Century Index : 00\n");
+ EFPRINTF(fp, "[0001]\t\tRTC Century Index : 32\n");
EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n");
EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n");
EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n");
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 97ed046..47a7699 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -325,8 +325,10 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
error = emulate_inout(ctx, vcpu, vme, strictio);
if (error) {
- fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out",
- bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
+ fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
+ in ? "in" : "out",
+ bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
+ port, vmexit->rip);
return (VMEXIT_ABORT);
} else {
return (VMEXIT_CONTINUE);
@@ -495,22 +497,27 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
static int
vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
- int err;
+ int err, i;
+ struct vie *vie;
+
stats.vmexit_inst_emul++;
+ vie = &vmexit->u.inst_emul.vie;
err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
- &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging);
+ vie, &vmexit->u.inst_emul.paging);
if (err) {
- if (err == EINVAL) {
- fprintf(stderr,
- "Failed to emulate instruction at 0x%lx\n",
- vmexit->rip);
- } else if (err == ESRCH) {
+ if (err == ESRCH) {
fprintf(stderr, "Unhandled memory access to 0x%lx\n",
vmexit->u.inst_emul.gpa);
}
+ fprintf(stderr, "Failed to emulate instruction [");
+ for (i = 0; i < vie->num_valid; i++) {
+ fprintf(stderr, "0x%02x%s", vie->inst[i],
+ i != (vie->num_valid - 1) ? " " : "");
+ }
+ fprintf(stderr, "] at 0x%lx\n", vmexit->rip);
return (VMEXIT_ABORT);
}
@@ -798,6 +805,11 @@ main(int argc, char *argv[])
exit(1);
}
+ if (guest_ncpus < 1) {
+ fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
+ exit(1);
+ }
+
max_vcpus = num_vcpus_allowed(ctx);
if (guest_ncpus > max_vcpus) {
fprintf(stderr, "%d vCPUs requested but only %d available\n",
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
index 6b906ed..a08ac4c 100644
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -59,17 +59,6 @@ __FBSDID("$FreeBSD$");
#define CONF1_ENABLE 0x80000000ul
-#define CFGWRITE(pi,off,val,b) \
-do { \
- if ((b) == 1) { \
- pci_set_cfgdata8((pi),(off),(val)); \
- } else if ((b) == 2) { \
- pci_set_cfgdata16((pi),(off),(val)); \
- } else { \
- pci_set_cfgdata32((pi),(off),(val)); \
- } \
-} while (0)
-
#define MAXBUSES (PCI_BUSMAX + 1)
#define MAXSLOTS (PCI_SLOTMAX + 1)
#define MAXFUNCS (PCI_FUNCMAX + 1)
@@ -124,6 +113,30 @@ static void pci_lintr_update(struct pci_devinst *pi);
static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot,
int func, int coff, int bytes, uint32_t *val);
+static __inline void
+CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes)
+{
+
+ if (bytes == 1)
+ pci_set_cfgdata8(pi, coff, val);
+ else if (bytes == 2)
+ pci_set_cfgdata16(pi, coff, val);
+ else
+ pci_set_cfgdata32(pi, coff, val);
+}
+
+static __inline uint32_t
+CFGREAD(struct pci_devinst *pi, int coff, int bytes)
+{
+
+ if (bytes == 1)
+ return (pci_get_cfgdata8(pi, coff));
+ else if (bytes == 2)
+ return (pci_get_cfgdata16(pi, coff));
+ else
+ return (pci_get_cfgdata32(pi, coff));
+}
+
/*
* I/O access
*/
@@ -1653,27 +1666,31 @@ pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv)
}
}
-static uint32_t
-bits_changed(uint32_t old, uint32_t new, uint32_t mask)
-{
-
- return ((old ^ new) & mask);
-}
-
static void
-pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
+pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes)
{
- int i;
- uint16_t old;
+ int i, rshift;
+ uint32_t cmd, cmd2, changed, old, readonly;
+
+ cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */
/*
- * The command register is at an offset of 4 bytes and thus the
- * guest could write 1, 2 or 4 bytes starting at this offset.
+ * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3.
+ *
+ * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are
+ * 'write 1 to clear'. However these bits are not set to '1' by
+ * any device emulation so it is simpler to treat them as readonly.
*/
+ rshift = (coff & 0x3) * 8;
+ readonly = 0xFFFFF880 >> rshift;
+
+ old = CFGREAD(pi, coff, bytes);
+ new &= ~readonly;
+ new |= (old & readonly);
+ CFGWRITE(pi, coff, new, bytes); /* update config */
- old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */
- CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */
- new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */
+ cmd2 = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */
+ changed = cmd ^ cmd2;
/*
* If the MMIO or I/O address space decoding has changed then
@@ -1686,7 +1703,7 @@ pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
break;
case PCIBAR_IO:
/* I/O address space decoding changed? */
- if (bits_changed(old, new, PCIM_CMD_PORTEN)) {
+ if (changed & PCIM_CMD_PORTEN) {
if (porten(pi))
register_bar(pi, i);
else
@@ -1696,7 +1713,7 @@ pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
case PCIBAR_MEM32:
case PCIBAR_MEM64:
/* MMIO address space decoding changed? */
- if (bits_changed(old, new, PCIM_CMD_MEMEN)) {
+ if (changed & PCIM_CMD_MEMEN) {
if (memen(pi))
register_bar(pi, i);
else
@@ -1776,14 +1793,8 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func,
needcfg = 1;
}
- if (needcfg) {
- if (bytes == 1)
- *eax = pci_get_cfgdata8(pi, coff);
- else if (bytes == 2)
- *eax = pci_get_cfgdata16(pi, coff);
- else
- *eax = pci_get_cfgdata32(pi, coff);
- }
+ if (needcfg)
+ *eax = CFGREAD(pi, coff, bytes);
pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax);
} else {
@@ -1853,8 +1864,8 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func,
} else if (pci_emul_iscap(pi, coff)) {
pci_emul_capwrite(pi, coff, bytes, *eax);
- } else if (coff == PCIR_COMMAND) {
- pci_emul_cmdwrite(pi, *eax, bytes);
+ } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) {
+ pci_emul_cmdsts_write(pi, coff, *eax, bytes);
} else {
CFGWRITE(pi, coff, *eax, bytes);
}
diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c
index 595faa0..35daa1f 100644
--- a/usr.sbin/bhyve/pci_virtio_block.c
+++ b/usr.sbin/bhyve/pci_virtio_block.c
@@ -371,8 +371,6 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
- pci_lintr_request(pi);
-
if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
blockif_close(sc->bc);
free(sc);
diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
index 5a9b068..1a029d5 100644
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@@ -641,8 +641,6 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
- pci_lintr_request(pi);
-
/* Link is up if we managed to open tap device. */
sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0);
diff --git a/usr.sbin/bhyve/virtio.c b/usr.sbin/bhyve/virtio.c
index e985805..41a9e42 100644
--- a/usr.sbin/bhyve/virtio.c
+++ b/usr.sbin/bhyve/virtio.c
@@ -148,8 +148,13 @@ vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
return (1);
} else
vs->vs_flags &= ~VIRTIO_USE_MSIX;
+
/* Only 1 MSI vector for bhyve */
pci_emul_add_msicap(vs->vs_pi, 1);
+
+ /* Legacy interrupts are mandatory for virtio devices */
+ pci_lintr_request(vs->vs_pi);
+
return (0);
}
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index e2b514d..223ee25 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -640,9 +640,9 @@ get_all_registers(struct vmctx *ctx, int vcpu)
uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer;
uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp;
uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
- int error;
+ int error = 0;
- if (get_efer || get_all) {
+ if (!error && (get_efer || get_all)) {
error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer);
if (error == 0)
printf("efer[%d]\t\t0x%016lx\n", vcpu, efer);
@@ -787,10 +787,10 @@ get_all_registers(struct vmctx *ctx, int vcpu)
static int
get_all_segments(struct vmctx *ctx, int vcpu)
{
- int error;
uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
+ int error = 0;
- if (get_desc_ds || get_all) {
+ if (!error && (get_desc_ds || get_all)) {
error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS,
&desc_base, &desc_limit, &desc_access);
if (error == 0) {
@@ -935,9 +935,9 @@ static int
get_misc_vmcs(struct vmctx *ctx, int vcpu)
{
uint64_t ctl, cr0, cr3, cr4, rsp, rip, pat, addr, u64;
- int error;
-
- if (get_cr0_mask || get_all) {
+ int error = 0;
+
+ if (!error && (get_cr0_mask || get_all)) {
uint64_t cr0mask;
error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask);
if (error == 0)
@@ -1161,9 +1161,9 @@ static int
get_misc_vmcb(struct vmctx *ctx, int vcpu)
{
uint64_t ctl, addr;
- int error;
+ int error = 0;
- if (get_vmcb_intercept || get_all) {
+ if (!error && (get_vmcb_intercept || get_all)) {
error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_CR_INTERCEPT, 4,
&ctl);
if (error == 0)
OpenPOWER on IntegriCloud