summaryrefslogtreecommitdiffstats
path: root/usr.sbin/bhyve
diff options
context:
space:
mode:
Diffstat (limited to 'usr.sbin/bhyve')
-rw-r--r--usr.sbin/bhyve/bhyve.821
-rw-r--r--usr.sbin/bhyve/bhyverun.c14
-rw-r--r--usr.sbin/bhyve/block_if.c74
-rw-r--r--usr.sbin/bhyve/inout.c8
-rw-r--r--usr.sbin/bhyve/pci_ahci.c13
-rw-r--r--usr.sbin/bhyve/pci_emul.c38
-rw-r--r--usr.sbin/bhyve/pci_hostbridge.c2
-rw-r--r--usr.sbin/bhyve/pci_virtio_block.c1
-rw-r--r--usr.sbin/bhyve/pci_virtio_net.c1
-rw-r--r--usr.sbin/bhyve/pci_virtio_rnd.c1
-rw-r--r--usr.sbin/bhyve/task_switch.c151
-rw-r--r--usr.sbin/bhyve/virtio.c2
12 files changed, 200 insertions, 126 deletions
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
index e15f9ac..6294e67 100644
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -193,8 +193,13 @@ format.
.Pp
Block storage devices:
.Bl -tag -width 10n
-.It Pa /filename Ns Oo , Ns Li nocache Oc Ns Oo , Ns Li direct Oc Ns Oo , Ns Li ro Oc
-.It Pa /dev/xxx Ns Oo , Ns Ar nocache Oc Ns Oo , Ns Ar direct Oc Ns Oo , Ns Ar ro Oc
+.It Pa /filename Ns Oo , Ns Ar block-device-options Oc
+.It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc
+.El
+.Pp
+The
+.Ar block-device-options
+are:
.Bl -tag -width 8n
.It Li nocache
Open the file with
@@ -204,14 +209,10 @@ Open the file using
.Dv O_SYNC .
.It Li ro
Force the file to be opened read-only.
-.El
-.Pp
-The
-.Li nocache ,
-.Li direct ,
-and
-.Li ro
-options are not available for virtio block devices.
+.It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc
+Specify the logical and physical sector sizes of the emulated disk.
+The physical sector size is optional and is equal to the logical sector size
+if not explicitly specified.
.El
.Pp
TTY devices:
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 47a7699..ee0f106 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -100,7 +100,7 @@ static struct vm_exit vmexit[VM_MAXCPU];
struct bhyvestats {
uint64_t vmexit_bogus;
- uint64_t vmexit_bogus_switch;
+ uint64_t vmexit_reqidle;
uint64_t vmexit_hlt;
uint64_t vmexit_pause;
uint64_t vmexit_mtrap;
@@ -461,6 +461,17 @@ vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
}
static int
+vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+ assert(vmexit->inst_length == 0);
+
+ stats.vmexit_reqidle++;
+
+ return (VMEXIT_CONTINUE);
+}
+
+static int
vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
@@ -571,6 +582,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_VMX] = vmexit_vmx,
[VM_EXITCODE_SVM] = vmexit_svm,
[VM_EXITCODE_BOGUS] = vmexit_bogus,
+ [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c
index bcb1617..ef8e11e 100644
--- a/usr.sbin/bhyve/block_if.c
+++ b/usr.sbin/bhyve/block_if.c
@@ -392,16 +392,18 @@ blockif_open(const char *optstr, const char *ident)
{
char tname[MAXCOMLEN + 1];
char name[MAXPATHLEN];
- char *nopt, *xopts;
+ char *nopt, *xopts, *cp;
struct blockif_ctxt *bc;
struct stat sbuf;
struct diocgattr_arg arg;
off_t size, psectsz, psectoff;
int extra, fd, i, sectsz;
- int nocache, sync, ro, candelete, geom;
+ int nocache, sync, ro, candelete, geom, ssopt, pssopt;
pthread_once(&blockif_once, blockif_init);
+ fd = -1;
+ ssopt = 0;
nocache = 0;
sync = 0;
ro = 0;
@@ -410,16 +412,25 @@ blockif_open(const char *optstr, const char *ident)
* The first element in the optstring is always a pathname.
* Optional elements follow
*/
- nopt = strdup(optstr);
- for (xopts = strtok(nopt, ",");
- xopts != NULL;
- xopts = strtok(NULL, ",")) {
- if (!strcmp(xopts, "nocache"))
+ nopt = xopts = strdup(optstr);
+ while (xopts != NULL) {
+ cp = strsep(&xopts, ",");
+ if (cp == nopt) /* file or device pathname */
+ continue;
+ else if (!strcmp(cp, "nocache"))
nocache = 1;
- else if (!strcmp(xopts, "sync"))
+ else if (!strcmp(cp, "sync") || !strcmp(cp, "direct"))
sync = 1;
- else if (!strcmp(xopts, "ro"))
+ else if (!strcmp(cp, "ro"))
ro = 1;
+ else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2)
+ ;
+ else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1)
+ pssopt = ssopt;
+ else {
+ fprintf(stderr, "Invalid device option \"%s\"\n", cp);
+ goto err;
+ }
}
extra = 0;
@@ -437,13 +448,12 @@ blockif_open(const char *optstr, const char *ident)
if (fd < 0) {
perror("Could not open backing file");
- return (NULL);
+ goto err;
}
if (fstat(fd, &sbuf) < 0) {
perror("Could not stat backing file");
- close(fd);
- return (NULL);
+ goto err;
}
/*
@@ -457,8 +467,7 @@ blockif_open(const char *optstr, const char *ident)
if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
perror("Could not fetch dev blk/sector size");
- close(fd);
- return (NULL);
+ goto err;
}
assert(size != 0);
assert(sectsz != 0);
@@ -473,10 +482,39 @@ blockif_open(const char *optstr, const char *ident)
} else
psectsz = sbuf.st_blksize;
+ if (ssopt != 0) {
+ if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
+ ssopt > pssopt) {
+ fprintf(stderr, "Invalid sector size %d/%d\n",
+ ssopt, pssopt);
+ goto err;
+ }
+
+ /*
+ * Some backend drivers (e.g. cd0, ada0) require that the I/O
+ * size be a multiple of the device's sector size.
+ *
+ * Validate that the emulated sector size complies with this
+ * requirement.
+ */
+ if (S_ISCHR(sbuf.st_mode)) {
+ if (ssopt < sectsz || (ssopt % sectsz) != 0) {
+ fprintf(stderr, "Sector size %d incompatible "
+ "with underlying device sector size %d\n",
+ ssopt, sectsz);
+ goto err;
+ }
+ }
+
+ sectsz = ssopt;
+ psectsz = pssopt;
+ psectoff = 0;
+ }
+
bc = calloc(1, sizeof(struct blockif_ctxt));
if (bc == NULL) {
- close(fd);
- return (NULL);
+ perror("calloc");
+ goto err;
}
bc->bc_magic = BLOCKIF_SIG;
@@ -506,6 +544,10 @@ blockif_open(const char *optstr, const char *ident)
}
return (bc);
+err:
+ if (fd >= 0)
+ close(fd);
+ return (NULL);
}
static int
diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c
index 402b953..929bb3c 100644
--- a/usr.sbin/bhyve/inout.c
+++ b/usr.sbin/bhyve/inout.c
@@ -107,7 +107,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
uint32_t eax, val;
inout_func_t handler;
void *arg;
- int error, retval;
+ int error, fault, retval;
enum vm_reg_name idxreg;
uint64_t gla, index, iterations, count;
struct vm_inout_str *vis;
@@ -163,11 +163,11 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
}
error = vm_copy_setup(ctx, vcpu, &vis->paging, gla,
- bytes, prot, iov, nitems(iov));
- if (error == -1) {
+ bytes, prot, iov, nitems(iov), &fault);
+ if (error) {
retval = -1; /* Unrecoverable error */
break;
- } else if (error == 1) {
+ } else if (fault) {
retval = 0; /* Resume guest to handle fault */
break;
}
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index 31e02f8..35a0859 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -2093,7 +2093,7 @@ pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
struct pci_ahci_softc *sc = pi->pi_arg;
assert(baridx == 5);
- assert(size == 4);
+ assert((offset % 4) == 0 && size == 4);
pthread_mutex_lock(&sc->mtx);
@@ -2182,24 +2182,29 @@ pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
static uint64_t
pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
- uint64_t offset, int size)
+ uint64_t regoff, int size)
{
struct pci_ahci_softc *sc = pi->pi_arg;
+ uint64_t offset;
uint32_t value;
assert(baridx == 5);
- assert(size == 4);
+ assert(size == 1 || size == 2 || size == 4);
+ assert((regoff & (size - 1)) == 0);
pthread_mutex_lock(&sc->mtx);
+ offset = regoff & ~0x3; /* round down to a multiple of 4 bytes */
if (offset < AHCI_OFFSET)
value = pci_ahci_host_read(sc, offset);
else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
value = pci_ahci_port_read(sc, offset);
else {
value = 0;
- WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
+ WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n",
+ regoff);
}
+ value >>= 8 * (regoff & 0x3);
pthread_mutex_unlock(&sc->mtx);
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
index a08ac4c..03ff0c0 100644
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -1938,7 +1938,7 @@ INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
#define DMEMSZ 4096
struct pci_emul_dsoftc {
uint8_t ioregs[DIOSZ];
- uint8_t memregs[DMEMSZ];
+ uint8_t memregs[2][DMEMSZ];
};
#define PCI_EMUL_MSI_MSGS 4
@@ -1967,6 +1967,9 @@ pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
assert(error == 0);
+ error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ);
+ assert(error == 0);
+
return (0);
}
@@ -2006,21 +2009,23 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
}
}
- if (baridx == 1) {
+ if (baridx == 1 || baridx == 2) {
if (offset + size > DMEMSZ) {
printf("diow: memw too large, offset %ld size %d\n",
offset, size);
return;
}
+ i = baridx - 1; /* 'memregs' index */
+
if (size == 1) {
- sc->memregs[offset] = value;
+ sc->memregs[i][offset] = value;
} else if (size == 2) {
- *(uint16_t *)&sc->memregs[offset] = value;
+ *(uint16_t *)&sc->memregs[i][offset] = value;
} else if (size == 4) {
- *(uint32_t *)&sc->memregs[offset] = value;
+ *(uint32_t *)&sc->memregs[i][offset] = value;
} else if (size == 8) {
- *(uint64_t *)&sc->memregs[offset] = value;
+ *(uint64_t *)&sc->memregs[i][offset] = value;
} else {
printf("diow: memw unknown size %d\n", size);
}
@@ -2030,7 +2035,7 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
*/
}
- if (baridx > 1) {
+ if (baridx > 2) {
printf("diow: unknown bar idx %d\n", baridx);
}
}
@@ -2041,6 +2046,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
{
struct pci_emul_dsoftc *sc = pi->pi_arg;
uint32_t value;
+ int i;
if (baridx == 0) {
if (offset + size > DIOSZ) {
@@ -2059,29 +2065,31 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
printf("dior: ior unknown size %d\n", size);
}
}
-
- if (baridx == 1) {
+
+ if (baridx == 1 || baridx == 2) {
if (offset + size > DMEMSZ) {
printf("dior: memr too large, offset %ld size %d\n",
offset, size);
return (0);
}
-
+
+ i = baridx - 1; /* 'memregs' index */
+
if (size == 1) {
- value = sc->memregs[offset];
+ value = sc->memregs[i][offset];
} else if (size == 2) {
- value = *(uint16_t *) &sc->memregs[offset];
+ value = *(uint16_t *) &sc->memregs[i][offset];
} else if (size == 4) {
- value = *(uint32_t *) &sc->memregs[offset];
+ value = *(uint32_t *) &sc->memregs[i][offset];
} else if (size == 8) {
- value = *(uint64_t *) &sc->memregs[offset];
+ value = *(uint64_t *) &sc->memregs[i][offset];
} else {
printf("dior: ior unknown size %d\n", size);
}
}
- if (baridx > 1) {
+ if (baridx > 2) {
printf("dior: unknown bar idx %d\n", baridx);
return (0);
}
diff --git a/usr.sbin/bhyve/pci_hostbridge.c b/usr.sbin/bhyve/pci_hostbridge.c
index 54a25ae..5c9ea28 100644
--- a/usr.sbin/bhyve/pci_hostbridge.c
+++ b/usr.sbin/bhyve/pci_hostbridge.c
@@ -38,7 +38,7 @@ pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
/* config space */
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */
- pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_BRIDGE);
+ pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST);
diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c
index 35daa1f..8500be6 100644
--- a/usr.sbin/bhyve/pci_virtio_block.c
+++ b/usr.sbin/bhyve/pci_virtio_block.c
@@ -370,6 +370,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
+ pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
blockif_close(sc->bc);
diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
index 1a029d5..3781ea9 100644
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@@ -640,6 +640,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
+ pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
/* Link is up if we managed to open tap device. */
sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0);
diff --git a/usr.sbin/bhyve/pci_virtio_rnd.c b/usr.sbin/bhyve/pci_virtio_rnd.c
index 92d1d6f..78448f5 100644
--- a/usr.sbin/bhyve/pci_virtio_rnd.c
+++ b/usr.sbin/bhyve/pci_virtio_rnd.c
@@ -170,6 +170,7 @@ pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_CRYPTO);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_ENTROPY);
+ pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix()))
return (1);
diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c
index ba6a9d2..69dfaae 100644
--- a/usr.sbin/bhyve/task_switch.c
+++ b/usr.sbin/bhyve/task_switch.c
@@ -202,7 +202,8 @@ desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel)
*/
static int
desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
- uint16_t sel, struct user_segment_descriptor *desc, bool doread)
+ uint16_t sel, struct user_segment_descriptor *desc, bool doread,
+ int *faultptr)
{
struct iovec iov[2];
uint64_t base;
@@ -215,28 +216,30 @@ desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
assert(limit >= SEL_LIMIT(sel));
error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
- sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov));
- if (error == 0) {
- if (doread)
- vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
- else
- vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
- }
- return (error);
+ sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
+ faultptr);
+ if (error || *faultptr)
+ return (error);
+
+ if (doread)
+ vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
+ else
+ vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
+ return (0);
}
static int
desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
- uint16_t sel, struct user_segment_descriptor *desc)
+ uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
{
- return (desc_table_rw(ctx, vcpu, paging, sel, desc, true));
+ return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr));
}
static int
desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
- uint16_t sel, struct user_segment_descriptor *desc)
+ uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
{
- return (desc_table_rw(ctx, vcpu, paging, sel, desc, false));
+ return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr));
}
/*
@@ -248,7 +251,7 @@ desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
*/
static int
read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
- uint16_t sel, struct user_segment_descriptor *desc)
+ uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
{
struct vm_guest_paging sup_paging;
int error;
@@ -267,7 +270,7 @@ read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
sup_paging = ts->paging;
sup_paging.cpl = 0; /* implicit supervisor mode */
- error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc);
+ error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr);
return (error);
}
@@ -301,14 +304,10 @@ ldt_desc(int sd_type)
/*
* Validate the descriptor 'seg_desc' associated with 'segment'.
- *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
*/
static int
validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
- int segment, struct seg_desc *seg_desc)
+ int segment, struct seg_desc *seg_desc, int *faultptr)
{
struct vm_guest_paging sup_paging;
struct user_segment_descriptor usd;
@@ -369,8 +368,8 @@ validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
/* Read the descriptor from the GDT/LDT */
sup_paging = ts->paging;
sup_paging.cpl = 0; /* implicit supervisor mode */
- error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd);
- if (error)
+ error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr);
+ if (error || *faultptr)
return (error);
/* Verify that the descriptor type is compatible with the segment */
@@ -476,14 +475,10 @@ update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd)
/*
* Update the vcpu registers to reflect the state of the new task.
- *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
*/
static int
tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
- uint16_t ot_sel, struct tss32 *tss, struct iovec *iov)
+ uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
{
struct seg_desc seg_desc, seg_desc2;
uint64_t *pdpte, maxphyaddr, reserved;
@@ -565,8 +560,9 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss));
/* Validate segment descriptors */
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc);
@@ -579,33 +575,40 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
* VM-entry checks so the guest can handle any exception injected
* during task switch emulation.
*/
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2);
- if (error)
+
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2);
ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc);
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc);
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc);
- error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc);
- if (error)
+ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
+ faultptr);
+ if (error || *faultptr)
return (error);
update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc);
@@ -616,14 +619,10 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
* Push an error code on the stack of the new task. This is needed if the
* task switch was triggered by a hardware exception that causes an error
* code to be saved (e.g. #PF).
- *
- * Returns 0 on success.
- * Returns 1 if an exception was injected into the guest.
- * Returns -1 otherwise.
*/
static int
push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
- int task_type, uint32_t errcode)
+ int task_type, uint32_t errcode, int *faultptr)
{
struct iovec iov[2];
struct seg_desc seg_desc;
@@ -632,6 +631,8 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
uint32_t esp;
uint16_t stacksel;
+ *faultptr = 0;
+
cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
@@ -666,17 +667,19 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
&seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
- return (1);
+ *faultptr = 1;
+ return (0);
}
if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
vm_inject_ac(ctx, vcpu, 1);
- return (1);
+ *faultptr = 1;
+ return (0);
}
error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
- iov, nitems(iov));
- if (error)
+ iov, nitems(iov), faultptr);
+ if (error || *faultptr)
return (error);
vm_copyout(ctx, vcpu, &errcode, iov, bytes);
@@ -687,16 +690,13 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
/*
* Evaluate return value from helper functions and potentially return to
* the VM run loop.
- * 0: success
- * +1: an exception was injected into the guest vcpu
- * -1: unrecoverable/programming error
*/
-#define CHKERR(x) \
+#define CHKERR(error,fault) \
do { \
- assert(((x) == 0) || ((x) == 1) || ((x) == -1)); \
- if ((x) == -1) \
+ assert((error == 0) || (error == EFAULT)); \
+ if (error) \
return (VMEXIT_ABORT); \
- else if ((x) == 1) \
+ else if (fault) \
return (VMEXIT_CONTINUE); \
} while (0)
@@ -711,7 +711,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
struct iovec nt_iov[2], ot_iov[2];
uint64_t cr0, ot_base;
uint32_t eip, ot_lim, access;
- int error, ext, minlimit, nt_type, ot_type, vcpu;
+ int error, ext, fault, minlimit, nt_type, ot_type, vcpu;
enum task_switch_reason reason;
uint16_t nt_sel, ot_sel;
@@ -739,8 +739,9 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
sup_paging.cpl = 0; /* implicit supervisor mode */
/* Fetch the new TSS descriptor */
- error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc);
- CHKERR(error);
+ error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc,
+ &fault);
+ CHKERR(error, fault);
nt = usd_to_seg_desc(&nt_desc);
@@ -792,8 +793,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
/* Fetch the new TSS */
error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
- PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov));
- CHKERR(error);
+ PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
+ CHKERR(error, fault);
vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);
/* Get the old TSS selector from the guest's task register */
@@ -818,13 +819,14 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
/* Fetch the old TSS descriptor */
- error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc);
- CHKERR(error);
+ error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc,
+ &fault);
+ CHKERR(error, fault);
/* Get the old TSS */
error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
- PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov));
- CHKERR(error);
+ PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
+ CHKERR(error, fault);
vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);
/*
@@ -834,8 +836,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
if (reason == TSR_IRET || reason == TSR_JMP) {
ot_desc.sd_type &= ~0x2;
error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
- &ot_desc);
- CHKERR(error);
+ &ot_desc, &fault);
+ CHKERR(error, fault);
}
if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
@@ -853,8 +855,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
if (reason != TSR_IRET) {
nt_desc.sd_type |= 0x2;
error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
- &nt_desc);
- CHKERR(error);
+ &nt_desc, &fault);
+ CHKERR(error, fault);
}
/* Update task register to point at the new TSS */
@@ -877,8 +879,9 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(error == 0);
/* Load processor state from new TSS */
- error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);
- CHKERR(error);
+ error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
+ &fault);
+ CHKERR(error, fault);
/*
* Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
@@ -889,8 +892,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(task_switch->ext);
assert(task_switch->reason == TSR_IDT_GATE);
error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
- task_switch->errcode);
- CHKERR(error);
+ task_switch->errcode, &fault);
+ CHKERR(error, fault);
}
/*
diff --git a/usr.sbin/bhyve/virtio.c b/usr.sbin/bhyve/virtio.c
index 41a9e42..11b1e62 100644
--- a/usr.sbin/bhyve/virtio.c
+++ b/usr.sbin/bhyve/virtio.c
@@ -316,7 +316,7 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
_vq_record(i, vdir, ctx, iov, n_iov, flags);
i++;
- } else if ((vs->vs_negotiated_caps &
+ } else if ((vs->vs_vc->vc_hv_caps &
VIRTIO_RING_F_INDIRECT_DESC) == 0) {
fprintf(stderr,
"%s: descriptor has forbidden INDIRECT flag, "
OpenPOWER on IntegriCloud