summaryrefslogtreecommitdiffstats
path: root/usr.sbin
diff options
context:
space:
mode:
authorneel <neel@FreeBSD.org>2015-06-28 03:22:26 +0000
committerneel <neel@FreeBSD.org>2015-06-28 03:22:26 +0000
commit79b96fdbcb5c0722d7886ea1fe175b7b7e4d751f (patch)
tree8cff46bd40a19bfc6a7b9831b2f3db23c5c757c0 /usr.sbin
parentc85aee0195bf01e2e4666927083768c4f182c0c0 (diff)
downloadFreeBSD-src-79b96fdbcb5c0722d7886ea1fe175b7b7e4d751f.zip
FreeBSD-src-79b96fdbcb5c0722d7886ea1fe175b7b7e4d751f.tar.gz
MFC r282209:
Emulate the 'bit test' instruction. MFC r282259: Re-implement RTC current time calculation to eliminate the possibility of losing time. MFC r282281: Advertise the MTRR feature via CPUID and emulate the minimal set of MTRR MSRs. MFC r282284: When an instruction cannot be decoded just return to userspace so bhyve(8) can dump the instruction bytes. MFC r282287: Don't require <sys/cpuset.h> to be always included before <machine/vmm.h>. MFC r282296: Emulate MSR_SYSCFG which is accessed by Linux on AMD cpus when MTRRs are enabled. MFC r282301: Relax limits when transitioning a vector from the IRR to the ISR and also when extinguishing it from the ISR in response to an EOI. MFC r282335: Advertise an additional memory BAR in the "dummy" device emulation. MFC r282336: Emulate machine check related MSRs to allow guest OSes like Windows to boot. MFC r282351: Don't advertise the Intel SMX capability to the guest. MFC r282407: Emulate the 'CMP r/m8, imm8' instruction. MFC r282519: Add macros for AMD-specific bits in MSR_EFER: LMSLE, FFXSR and TCE. MFC r282520: Emulate guest writes to EFER_MSR properly. MFC r282558: Deprecate the 3-way return values from vm_gla2gpa() and vm_copy_setup(). MFC r282571: Check 'td_owepreempt' and yield the vcpu thread if it is set. MFC r282595: Allow byte reads of AHCI registers. MFC r282784: Handling indirect descriptors is a capability of the host and not one that needs to be negotiated. Use the host capabilities field and not the negotiated field when verifying that indirect descriptors are supported. MFC r282788: Allow configuration of the sector size advertised to the guest. MFC r282865: Set the subvendor field in config space to the vendor ID. This is required by the Windows virtio drivers to correctly match a device. MFC r282922: Bump the size of the blockif scatter-gather list to 67. MFC r283075: Fix off-by-one in array index bounds check. bhyveload would allow you to create 33 entries on an array that only has 32 slots MFC r283168: Temporarily revert r282922 which bumped the max descriptors. MFC r283255: Emulate the "CMP r/m, reg" instruction (opcode 39H). MFC r283256: Add an option "--get-vmcs-exit-inst-length" to display the instruction length of the instruction that caused the VM-exit. MFC r283264: Change the header type of the emulated host-bridge from type 1 to type 0. MFC r283293: Don't rely on the 'VM-exit instruction length' field in the VMCS to always have an accurate length on an EPT violation. MFC r283299: Remove bogus verification of instruction length after instruction decode. MFC r283308: Exceptions don't deliver an error code in real mode. MFC r283657: Fix non-deterministic delays when accessing a vcpu that was in "running" or "sleeping" state. MFC r283973: Use tunable 'hw.vmm.svm.features' to disable specific SVM features even though they might be available in hardware. Use tunable 'hw.vmm.svm.num_asids' to limit the number of ASIDs used by the hypervisor. MFC r284046: Fix regression in 'verify_gla()' with the RIP-relative addressing mode. MFC r284174: Support guest writes to the TSC by enabling the "use TSC offsetting" execution control.
Diffstat (limited to 'usr.sbin')
-rw-r--r--usr.sbin/bhyve/bhyve.821
-rw-r--r--usr.sbin/bhyve/bhyverun.c14
-rw-r--r--usr.sbin/bhyve/block_if.c74
-rw-r--r--usr.sbin/bhyve/inout.c8
-rw-r--r--usr.sbin/bhyve/pci_ahci.c13
-rw-r--r--usr.sbin/bhyve/pci_emul.c38
-rw-r--r--usr.sbin/bhyve/pci_hostbridge.c2
-rw-r--r--usr.sbin/bhyve/pci_virtio_block.c1
-rw-r--r--usr.sbin/bhyve/pci_virtio_net.c1
-rw-r--r--usr.sbin/bhyve/pci_virtio_rnd.c1
-rw-r--r--usr.sbin/bhyve/task_switch.c151
-rw-r--r--usr.sbin/bhyve/virtio.c2
-rw-r--r--usr.sbin/bhyvectl/bhyvectl.c13
-rw-r--r--usr.sbin/bhyveload/bhyveload.c2
14 files changed, 213 insertions, 128 deletions
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
index e15f9ac..6294e67 100644
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -193,8 +193,13 @@ format.
.Pp
Block storage devices:
.Bl -tag -width 10n
-.It Pa /filename Ns Oo , Ns Li nocache Oc Ns Oo , Ns Li direct Oc Ns Oo , Ns Li ro Oc
-.It Pa /dev/xxx Ns Oo , Ns Ar nocache Oc Ns Oo , Ns Ar direct Oc Ns Oo , Ns Ar ro Oc
+.It Pa /filename Ns Oo , Ns Ar block-device-options Oc
+.It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc
+.El
+.Pp
+The
+.Ar block-device-options
+are:
.Bl -tag -width 8n
.It Li nocache
Open the file with
@@ -204,14 +209,10 @@ Open the file using
.Dv O_SYNC .
.It Li ro
Force the file to be opened read-only.
-.El
-.Pp
-The
-.Li nocache ,
-.Li direct ,
-and
-.Li ro
-options are not available for virtio block devices.
+.It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc
+Specify the logical and physical sector sizes of the emulated disk.
+The physical sector size is optional and is equal to the logical sector size
+if not explicitly specified.
.El
.Pp
TTY devices:
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 47a7699..ee0f106 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -100,7 +100,7 @@ static struct vm_exit vmexit[VM_MAXCPU];
struct bhyvestats {
uint64_t vmexit_bogus;
- uint64_t vmexit_bogus_switch;
+ uint64_t vmexit_reqidle;
uint64_t vmexit_hlt;
uint64_t vmexit_pause;
uint64_t vmexit_mtrap;
@@ -461,6 +461,17 @@ vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
}
static int
+vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+ assert(vmexit->inst_length == 0);
+
+ stats.vmexit_reqidle++;
+
+ return (VMEXIT_CONTINUE);
+}
+
+static int
vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
@@ -571,6 +582,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_VMX] = vmexit_vmx,
[VM_EXITCODE_SVM] = vmexit_svm,
[VM_EXITCODE_BOGUS] = vmexit_bogus,
+ [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c
index bcb1617..ef8e11e 100644
--- a/usr.sbin/bhyve/block_if.c
+++ b/usr.sbin/bhyve/block_if.c
@@ -392,16 +392,18 @@ blockif_open(const char *optstr, const char *ident)
{
char tname[MAXCOMLEN + 1];
char name[MAXPATHLEN];
- char *nopt, *xopts;
+ char *nopt, *xopts, *cp;
struct blockif_ctxt *bc;
struct stat sbuf;
struct diocgattr_arg arg;
off_t size, psectsz, psectoff;
int extra, fd, i, sectsz;
- int nocache, sync, ro, candelete, geom;
+ int nocache, sync, ro, candelete, geom, ssopt, pssopt;
pthread_once(&blockif_once, blockif_init);
+ fd = -1;
+ ssopt = 0;
nocache = 0;
sync = 0;
ro = 0;
@@ -410,16 +412,25 @@ blockif_open(const char *optstr, const char *ident)
* The first element in the optstring is always a pathname.
* Optional elements follow
*/
- nopt = strdup(optstr);
- for (xopts = strtok(nopt, ",");
- xopts != NULL;
- xopts = strtok(NULL, ",")) {
- if (!strcmp(xopts, "nocache"))
+ nopt = xopts = strdup(optstr);
+ while (xopts != NULL) {
+ cp = strsep(&xopts, ",");
+ if (cp == nopt) /* file or device pathname */
+ continue;
+ else if (!strcmp(cp, "nocache"))
nocache = 1;
- else if (!strcmp(xopts, "sync"))
+ else if (!strcmp(cp, "sync") || !strcmp(cp, "direct"))
sync = 1;
- else if (!strcmp(xopts, "ro"))
+ else if (!strcmp(cp, "ro"))
ro = 1;
+ else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2)
+ ;
+ else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1)
+ pssopt = ssopt;
+ else {
+ fprintf(stderr, "Invalid device option \"%s\"\n", cp);
+ goto err;
+ }
}
extra = 0;
@@ -437,13 +448,12 @@ blockif_open(const char *optstr, const char *ident)
if (fd < 0) {
perror("Could not open backing file");
- return (NULL);
+ goto err;
}
if (fstat(fd, &sbuf) < 0) {
perror("Could not stat backing file");
- close(fd);
- return (NULL);
+ goto err;
}
/*
@@ -457,8 +467,7 @@ blockif_open(const char *optstr, const char *ident)
if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
perror("Could not fetch dev blk/sector size");
- close(fd);
- return (NULL);
+ goto err;
}
assert(size != 0);
assert(sectsz != 0);
@@ -473,10 +482,39 @@ blockif_open(const char *optstr, const char *ident)
} else
psectsz = sbuf.st_blksize;
+ if (ssopt != 0) {
+ if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
+ ssopt > pssopt) {
+ fprintf(stderr, "Invalid sector size %d/%d\n",
+ ssopt, pssopt);
+ goto err;
+ }
+
+ /*
+ * Some backend drivers (e.g. cd0, ada0) require that the I/O
+ * size be a multiple of the device's sector size.
+ *
+ * Validate that the emulated sector size complies with this
+ * requirement.
+ */
+ if (S_ISCHR(sbuf.st_mode)) {
+ if (ssopt < sectsz || (ssopt % sectsz) != 0) {
+ fprintf(stderr, "Sector size %d incompatible "
+ "with underlying device sector size %d\n",
+ ssopt, sectsz);
+ goto err;
+ }
+ }
+
+ sectsz = ssopt;
+ psectsz = pssopt;
+ psectoff = 0;
+ }
+
bc = calloc(1, sizeof(struct blockif_ctxt));
if (bc == NULL) {
- close(fd);
- return (NULL);
+ perror("calloc");
+ goto err;
}
bc->bc_magic = BLOCKIF_SIG;
@@ -506,6 +544,10 @@ blockif_open(const char *optstr, const char *ident)
}
return (bc);
+err:
+ if (fd >= 0)
+ close(fd);
+ return (NULL);
}
static int
diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c
index 402b953..929bb3c 100644
--- a/usr.sbin/bhyve/inout.c
+++ b/usr.sbin/bhyve/inout.c
@@ -107,7 +107,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
uint32_t eax, val;
inout_func_t handler;
void *arg;
- int error, retval;
+ int error, fault, retval;
enum vm_reg_name idxreg;
uint64_t gla, index, iterations, count;
struct vm_inout_str *vis;
@@ -163,11 +163,11 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
}
error = vm_copy_setup(ctx, vcpu, &vis->paging, gla,
- bytes, prot, iov, nitems(iov));
- if (error == -1) {
+ bytes, prot, iov, nitems(iov), &fault);
+ if (error) {
retval = -1; /* Unrecoverable error */
break;
- } else if (error == 1) {
+ } else if (fault) {
retval = 0; /* Resume guest to handle fault */
break;
}
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index 31e02f8..35a0859 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -2093,7 +2093,7 @@ pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
struct pci_ahci_softc *sc = pi->pi_arg;
assert(baridx == 5);
- assert(size == 4);
+ assert((offset % 4) == 0 && size == 4);
pthread_mutex_lock(&sc->mtx);
@@ -2182,24 +2182,29 @@ pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
static uint64_t
pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
- uint64_t offset, int size)
+ uint64_t regoff, int size)
{
struct pci_ahci_softc *sc = pi->pi_arg;
+ uint64_t offset;
uint32_t value;
assert(baridx == 5);
- assert(size == 4);
+ assert(size == 1 || size == 2 || size == 4);
+ assert((regoff & (size - 1)) == 0);
pthread_mutex_lock(&sc->mtx);
+ offset = regoff & ~0x3; /* round down to a multiple of 4 bytes */
if (offset < AHCI_OFFSET)
value = pci_ahci_host_read(sc, offset);
else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
value = pci_ahci_port_read(sc, offset);
else {
value = 0;
- WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
+ WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n",
+ regoff);
}
+ value >>= 8 * (regoff & 0x3);
pthread_mutex_unlock(&sc->mtx);
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
index a08ac4c..03ff0c0 100644
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -1938,7 +1938,7 @@ INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
#define DMEMSZ 4096
struct pci_emul_dsoftc {
uint8_t ioregs[DIOSZ];
- uint8_t memregs[DMEMSZ];
+ uint8_t memregs[2][DMEMSZ];
};
#define PCI_EMUL_MSI_MSGS 4
@@ -1967,6 +1967,9 @@ pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
assert(error == 0);
+ error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ);
+ assert(error == 0);
+
return (0);
}
@@ -2006,21 +2009,23 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
}
}
- if (baridx == 1) {
+ if (baridx == 1 || baridx == 2) {
if (offset + size > DMEMSZ) {
printf("diow: memw too large, offset %ld size %d\n",
offset, size);
return;
}
+ i = baridx - 1; /* 'memregs' index */
+
if (size == 1) {
- sc->memregs[offset] = value;
+ sc->memregs[i][offset] = value;
} else if (size == 2) {
- *(uint16_t *)&sc->memregs[offset] = value;
+ *(uint16_t *)&sc->memregs[i][offset] = value;
} else if (size == 4) {
- *(uint32_t *)&sc->memregs[offset] = value;
+ *(uint32_t *)&sc->memregs[i][offset] = value;
} else if (size == 8) {
- *(uint64_t *)&sc->memregs[offset] = value;
+ *(uint64_t *)&sc->memregs[i][offset] = value;
} else {
printf("diow: memw unknown size %d\n", size);
}
@@ -2030,7 +2035,7 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
*/
}
- if (baridx > 1) {
+ if (baridx > 2) {
printf("diow: unknown bar idx %d\n", baridx);
}
}
@@ -2041,6 +2046,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
{
struct pci_emul_dsoftc *sc = pi->pi_arg;
uint32_t value;
+ int i;
if (baridx == 0) {
if (offset + size > DIOSZ) {
@@ -2059,29 +2065,31 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
printf("dior: ior unknown size %d\n", size);
}
}
-
- if (baridx == 1) {
+
+ if (baridx == 1 || baridx == 2) {
if (offset + size > DMEMSZ) {
printf("dior: memr too large, offset %ld size %d\n",
offset, size);
return (0);
}
-
+
+ i = baridx - 1; /* 'memregs' index */
+
if (size == 1) {
- value = sc->memregs[offset];
+ value = sc->memregs[i][offset];
} else if (size == 2) {
- value = *(uint16_t *) &sc->memregs[offset];
+ value = *(uint16_t *) &sc->memregs[i][offset];
} else if (size == 4) {
- value = *(uint32_t *) &sc->memregs[offset];
+ value = *(uint32_t *) &sc->memregs[i][offset];
} else if (size == 8) {
- value = *(uint64_t *) &sc->memregs[offset];
+ value = *(uint64_t *) &sc->memregs[i][offset];
} else {
printf("dior: ior unknown size %d\n", size);
}
}
- if (baridx > 1) {
+ if (baridx > 2) {
printf("dior: unknown bar idx %d\n", baridx);
return (0);
}
diff --git a/usr.sbin/bhyve/pci_hostbridge.c b/usr.sbin/bhyve/pci_hostbridge.c
index 54a25ae..5c9ea28 100644
--- a/usr.sbin/bhyve/pci_hostbridge.c
+++ b/usr.sbin/bhyve/pci_hostbridge.c
@@ -38,7 +38,7 @@ pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
/* config space */
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */
- pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_BRIDGE);
+ pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST);
diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c
index 35daa1f..8500be6 100644
--- a/usr.sbin/bhyve/pci_virtio_block.c
+++ b/usr.sbin/bhyve/pci_virtio_block.c
@@ -370,6 +370,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
+ pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
blockif_close(sc->bc);
diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
index 1a029d5..3781ea9 100644
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@@ -640,6 +640,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
+ pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
/* Link is up if we managed to open tap device. */
sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0);
diff --git a/usr.sbin/bhyve/pci_virtio_rnd.c b/usr.sbin/bhyve/pci_virtio_rnd.c
index 92d1d6f..78448f5 100644
--- a/usr.sbin/bhyve/pci_virtio_rnd.c
+++ b/usr.sbin/bhyve/pci_virtio_rnd.c
@@ -170,6 +170,7 @@ pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_CRYPTO);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_ENTROPY);
+ pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix()))
return (1);
diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c
index ba6a9d2..69dfaae 100644
--- a/usr.sbin/bhyve/task_switch.c
+++ b/usr.sbin/bhyve/task_switch.c
@@ -202,7 +202,8 @@ desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel)
*/
static int
desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
- uint16_t sel, struct user_segment_descriptor *desc, bool doread)
+ uint16_t sel, struct user_segment_descriptor *desc, bool doread,
+ int *faultptr)
{
struct iovec iov[2];
uint64_t base;
@@ -215,28 +216,30 @@ desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
assert(limit >= SEL_LIMIT(sel));
error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
- sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov));
- if (error == 0) {
- if (doread)
- vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
- else
- vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
- }
- return (error);
+ sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
+ faultptr);
+ if (error || *faultptr)
+ return (error);
+
+ if (doread)
+ vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
+ else
+ vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
+ return (0);
}
static int
desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
- uint16_t sel, struct user_segment_descriptor *desc)
+ uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
{
- return (desc_table_rw(ctx, vcpu, paging, sel, desc, true));
+ return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr));
}
static int
desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
- uint16_t sel, struct user_segment_descriptor *desc)
+ uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
{
- return (desc_table_rw(ctx, vcpu, paging, sel, desc, false));
+ return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr));
}
/*
@@ -248,7 +251,7 @@ desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
*/
static int
read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
- uint16_t sel, struct user_segment_descriptor *desc)
+ uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
{
struct vm_guest_paging sup_paging;
int error;
@@ -267,7 +270,7 @@ read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
sup_paging = ts->paging;
sup_paging.cpl = 0; /* implicit supervisor mode */
- error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc);
+ error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr);
return (error);
}
@@ -301,14 +304,10 @@ ldt_desc(int sd_type)
/*
* Validate the descriptor 'seg_desc' associated with 'segment'.
- *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
*/
static int
validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
- int segment, struct seg_desc *seg_desc)
+ int segment, struct seg_desc *seg_desc, int *faultptr)
{
struct vm_guest_paging sup_paging;
struct user_segment_descriptor usd;
@@ -369,8 +368,8 @@ validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
/* Read the descriptor from the GDT/LDT */
sup_paging = ts->paging;
sup_paging.cpl = 0; /* implicit supervisor mode */
- error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd);
- if (error)
+ error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr);
+ if (error || *faultptr)
return (error);
/* Verify that the descriptor type is compatible with the segment */
@@ -476,14 +475,10 @@ update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd)
/*
* Update the vcpu registers to reflect the state of the new task.
- *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
*/
static int
tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
- uint16_t ot_sel, struct tss32 *tss, struct iovec *iov)
+ uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
{
struct seg_desc seg_desc, seg_desc2;
uint64_t *pdpte, maxphyaddr, reserved;
@@ -565,8 +560,9 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss));
/* Validate segment descriptors */
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc);
@@ -579,33 +575,40 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
* VM-entry checks so the guest can handle any exception injected
* during task switch emulation.
*/
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2);
- if (error)
+
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2);
ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc);
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc);
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc);
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc);
@@ -616,14 +619,10 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
* Push an error code on the stack of the new task. This is needed if the
* task switch was triggered by a hardware exception that causes an error
* code to be saved (e.g. #PF).
- *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
*/
static int
push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
- int task_type, uint32_t errcode)
+ int task_type, uint32_t errcode, int *faultptr)
{
struct iovec iov[2];
struct seg_desc seg_desc;
@@ -632,6 +631,8 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
uint32_t esp;
uint16_t stacksel;
+ *faultptr = 0;
+
cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
@@ -666,17 +667,19 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
&seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
- return (1);
+ *faultptr = 1;
+ return (0);
}
if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
vm_inject_ac(ctx, vcpu, 1);
- return (1);
+ *faultptr = 1;
+ return (0);
}
error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
- iov, nitems(iov));
- if (error)
+ iov, nitems(iov), faultptr);
+ if (error || *faultptr)
return (error);
vm_copyout(ctx, vcpu, &errcode, iov, bytes);
@@ -687,16 +690,13 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
/*
* Evaluate return value from helper functions and potentially return to
* the VM run loop.
- * 0: success
- * +1: an exception was injected into the guest vcpu
- * -1: unrecoverable/programming error
*/
-#define CHKERR(x) \
+#define CHKERR(error,fault) \
do { \
- assert(((x) == 0) || ((x) == 1) || ((x) == -1)); \
- if ((x) == -1) \
+ assert((error == 0) || (error == EFAULT)); \
+ if (error) \
return (VMEXIT_ABORT); \
- else if ((x) == 1) \
+ else if (fault) \
return (VMEXIT_CONTINUE); \
} while (0)
@@ -711,7 +711,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
struct iovec nt_iov[2], ot_iov[2];
uint64_t cr0, ot_base;
uint32_t eip, ot_lim, access;
- int error, ext, minlimit, nt_type, ot_type, vcpu;
+ int error, ext, fault, minlimit, nt_type, ot_type, vcpu;
enum task_switch_reason reason;
uint16_t nt_sel, ot_sel;
@@ -739,8 +739,9 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
sup_paging.cpl = 0; /* implicit supervisor mode */
/* Fetch the new TSS descriptor */
- error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc);
- CHKERR(error);
+ error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc,
+ &fault);
+ CHKERR(error, fault);
nt = usd_to_seg_desc(&nt_desc);
@@ -792,8 +793,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
/* Fetch the new TSS */
error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
- PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov));
- CHKERR(error);
+ PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
+ CHKERR(error, fault);
vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);
/* Get the old TSS selector from the guest's task register */
@@ -818,13 +819,14 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
/* Fetch the old TSS descriptor */
- error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc);
- CHKERR(error);
+ error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc,
+ &fault);
+ CHKERR(error, fault);
/* Get the old TSS */
error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
- PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov));
- CHKERR(error);
+ PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
+ CHKERR(error, fault);
vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);
/*
@@ -834,8 +836,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
if (reason == TSR_IRET || reason == TSR_JMP) {
ot_desc.sd_type &= ~0x2;
error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
- &ot_desc);
- CHKERR(error);
+ &ot_desc, &fault);
+ CHKERR(error, fault);
}
if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
@@ -853,8 +855,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
if (reason != TSR_IRET) {
nt_desc.sd_type |= 0x2;
error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
- &nt_desc);
- CHKERR(error);
+ &nt_desc, &fault);
+ CHKERR(error, fault);
}
/* Update task register to point at the new TSS */
@@ -877,8 +879,9 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(error == 0);
/* Load processor state from new TSS */
- error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);
- CHKERR(error);
+ error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
+ &fault);
+ CHKERR(error, fault);
/*
* Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
@@ -889,8 +892,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(task_switch->ext);
assert(task_switch->reason == TSR_IDT_GATE);
error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
- task_switch->errcode);
- CHKERR(error);
+ task_switch->errcode, &fault);
+ CHKERR(error, fault);
}
/*
diff --git a/usr.sbin/bhyve/virtio.c b/usr.sbin/bhyve/virtio.c
index 41a9e42..11b1e62 100644
--- a/usr.sbin/bhyve/virtio.c
+++ b/usr.sbin/bhyve/virtio.c
@@ -316,7 +316,7 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
_vq_record(i, vdir, ctx, iov, n_iov, flags);
i++;
- } else if ((vs->vs_negotiated_caps &
+ } else if ((vs->vs_vc->vc_hv_caps &
VIRTIO_RING_F_INDIRECT_DESC) == 0) {
fprintf(stderr,
"%s: descriptor has forbidden INDIRECT flag, "
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index 223ee25..7d3017f 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -293,6 +293,7 @@ static int get_guest_pat, get_host_pat;
static int get_guest_sysenter, get_vmcs_link;
static int get_exit_reason, get_vmcs_exit_qualification;
static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error;
+static int get_vmcs_exit_inst_length;
static uint64_t desc_base;
static uint32_t desc_limit, desc_access;
@@ -1145,7 +1146,15 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu)
vcpu, u64);
}
}
-
+
+ if (!error && (get_vmcs_exit_inst_length || get_all)) {
+ error = vm_get_vmcs_field(ctx, vcpu,
+ VMCS_EXIT_INSTRUCTION_LENGTH, &u64);
+ if (error == 0)
+ printf("vmcs_exit_inst_length[%d]\t0x%08x\n", vcpu,
+ (uint32_t)u64);
+ }
+
if (!error && (get_vmcs_exit_qualification || get_all)) {
error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION,
&u64);
@@ -1405,6 +1414,8 @@ setup_options(bool cpu_intel)
REQ_ARG, 0, SET_VMCS_ENTRY_INTERRUPTION_INFO },
{ "get-vmcs-exit-qualification",
NO_ARG, &get_vmcs_exit_qualification, 1 },
+ { "get-vmcs-exit-inst-length",
+ NO_ARG, &get_vmcs_exit_inst_length, 1 },
{ "get-vmcs-interruptibility",
NO_ARG, &get_vmcs_interruptibility, 1 },
{ "get-vmcs-exit-interruption-error",
diff --git a/usr.sbin/bhyveload/bhyveload.c b/usr.sbin/bhyveload/bhyveload.c
index eaf71a8..8ebf116 100644
--- a/usr.sbin/bhyveload/bhyveload.c
+++ b/usr.sbin/bhyveload/bhyveload.c
@@ -609,7 +609,7 @@ disk_open(char *path)
{
int err, fd;
- if (ndisks > NDISKS)
+ if (ndisks >= NDISKS)
return (ERANGE);
err = 0;
OpenPOWER on IntegriCloud