diff options
Diffstat (limited to 'usr.sbin/bhyve')
-rw-r--r-- | usr.sbin/bhyve/bhyve.8 | 21 | ||||
-rw-r--r-- | usr.sbin/bhyve/bhyverun.c | 14 | ||||
-rw-r--r-- | usr.sbin/bhyve/block_if.c | 74 | ||||
-rw-r--r-- | usr.sbin/bhyve/inout.c | 8 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_ahci.c | 13 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_emul.c | 38 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_hostbridge.c | 2 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_virtio_block.c | 1 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_virtio_net.c | 1 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_virtio_rnd.c | 1 | ||||
-rw-r--r-- | usr.sbin/bhyve/task_switch.c | 151 | ||||
-rw-r--r-- | usr.sbin/bhyve/virtio.c | 2 |
12 files changed, 200 insertions, 126 deletions
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 index e15f9ac..6294e67 100644 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -193,8 +193,13 @@ format. .Pp Block storage devices: .Bl -tag -width 10n -.It Pa /filename Ns Oo , Ns Li nocache Oc Ns Oo , Ns Li direct Oc Ns Oo , Ns Li ro Oc -.It Pa /dev/xxx Ns Oo , Ns Ar nocache Oc Ns Oo , Ns Ar direct Oc Ns Oo , Ns Ar ro Oc +.It Pa /filename Ns Oo , Ns Ar block-device-options Oc +.It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc +.El +.Pp +The +.Ar block-device-options +are: .Bl -tag -width 8n .It Li nocache Open the file with @@ -204,14 +209,10 @@ Open the file using .Dv O_SYNC . .It Li ro Force the file to be opened read-only. -.El -.Pp -The -.Li nocache , -.Li direct , -and -.Li ro -options are not available for virtio block devices. +.It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc +Specify the logical and physical sector sizes of the emulated disk. +The physical sector size is optional and is equal to the logical sector size +if not explicitly specified. .El .Pp TTY devices: diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 47a7699..ee0f106 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -100,7 +100,7 @@ static struct vm_exit vmexit[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; - uint64_t vmexit_bogus_switch; + uint64_t vmexit_reqidle; uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; @@ -461,6 +461,17 @@ vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) } static int +vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + assert(vmexit->inst_length == 0); + + stats.vmexit_reqidle++; + + return (VMEXIT_CONTINUE); +} + +static int vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { @@ -571,6 +582,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_SVM] = vmexit_svm, [VM_EXITCODE_BOGUS] = vmexit_bogus, + [VM_EXITCODE_REQIDLE] = vmexit_reqidle, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap, diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index bcb1617..ef8e11e 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -392,16 +392,18 @@ blockif_open(const char *optstr, const char *ident) { char tname[MAXCOMLEN + 1]; char name[MAXPATHLEN]; - char *nopt, *xopts; + char *nopt, *xopts, *cp; struct blockif_ctxt *bc; struct stat sbuf; struct diocgattr_arg arg; off_t size, psectsz, psectoff; int extra, fd, i, sectsz; - int nocache, sync, ro, candelete, geom; + int nocache, sync, ro, candelete, geom, ssopt, pssopt; pthread_once(&blockif_once, blockif_init); + fd = -1; + ssopt = 0; nocache = 0; sync = 0; ro = 0; @@ -410,16 +412,25 @@ blockif_open(const char *optstr, const char *ident) * The first element in the optstring is always a pathname. * Optional elements follow */ - nopt = strdup(optstr); - for (xopts = strtok(nopt, ","); - xopts != NULL; - xopts = strtok(NULL, ",")) { - if (!strcmp(xopts, "nocache")) + nopt = xopts = strdup(optstr); + while (xopts != NULL) { + cp = strsep(&xopts, ","); + if (cp == nopt) /* file or device pathname */ + continue; + else if (!strcmp(cp, "nocache")) nocache = 1; - else if (!strcmp(xopts, "sync")) + else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) sync = 1; - else if (!strcmp(xopts, "ro")) + else if (!strcmp(cp, "ro")) ro = 1; + else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) + ; + else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) + pssopt = ssopt; + else { + fprintf(stderr, "Invalid device option \"%s\"\n", cp); + goto err; + } } extra = 0; @@ -437,13 +448,12 @@ blockif_open(const char *optstr, const char *ident) if (fd < 0) { perror("Could not open backing file"); - return (NULL); + goto err; } if (fstat(fd, &sbuf) < 0) { perror("Could not stat backing file"); - close(fd); - return (NULL); + goto err; } /* @@ -457,8 +467,7 @@ blockif_open(const char *optstr, const char *ident) if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || ioctl(fd, DIOCGSECTORSIZE, §sz)) { perror("Could not fetch dev blk/sector size"); - close(fd); - return (NULL); + goto err; } assert(size != 0); assert(sectsz != 0); @@ -473,10 +482,39 @@ blockif_open(const char *optstr, const char *ident) } else psectsz = sbuf.st_blksize; + if (ssopt != 0) { + if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || + ssopt > pssopt) { + fprintf(stderr, "Invalid sector size %d/%d\n", + ssopt, pssopt); + goto err; + } + + /* + * Some backend drivers (e.g. cd0, ada0) require that the I/O + * size be a multiple of the device's sector size. + * + * Validate that the emulated sector size complies with this + * requirement. + */ + if (S_ISCHR(sbuf.st_mode)) { + if (ssopt < sectsz || (ssopt % sectsz) != 0) { + fprintf(stderr, "Sector size %d incompatible " + "with underlying device sector size %d\n", + ssopt, sectsz); + goto err; + } + } + + sectsz = ssopt; + psectsz = pssopt; + psectoff = 0; + } + bc = calloc(1, sizeof(struct blockif_ctxt)); if (bc == NULL) { - close(fd); - return (NULL); + perror("calloc"); + goto err; } bc->bc_magic = BLOCKIF_SIG; @@ -506,6 +544,10 @@ blockif_open(const char *optstr, const char *ident) } return (bc); +err: + if (fd >= 0) + close(fd); + return (NULL); } static int diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index 402b953..929bb3c 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -107,7 +107,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) uint32_t eax, val; inout_func_t handler; void *arg; - int error, retval; + int error, fault, retval; enum vm_reg_name idxreg; uint64_t gla, index, iterations, count; struct vm_inout_str *vis; @@ -163,11 +163,11 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) } error = vm_copy_setup(ctx, vcpu, &vis->paging, gla, - bytes, prot, iov, nitems(iov)); - if (error == -1) { + bytes, prot, iov, nitems(iov), &fault); + if (error) { retval = -1; /* Unrecoverable error */ break; - } else if (error == 1) { + } else if (fault) { retval = 0; /* Resume guest to handle fault */ break; } diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c index 31e02f8..35a0859 100644 --- a/usr.sbin/bhyve/pci_ahci.c +++ b/usr.sbin/bhyve/pci_ahci.c @@ -2093,7 +2093,7 @@ pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, struct pci_ahci_softc *sc = pi->pi_arg; assert(baridx == 5); - assert(size == 4); + assert((offset % 4) == 0 && size == 4); pthread_mutex_lock(&sc->mtx); @@ -2182,24 +2182,29 @@ pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset) static uint64_t pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, - uint64_t offset, int size) + uint64_t regoff, int size) { struct pci_ahci_softc *sc = pi->pi_arg; + uint64_t offset; uint32_t value; assert(baridx == 5); - assert(size == 4); + assert(size == 1 || size == 2 || size == 4); + assert((regoff & (size - 1)) == 0); pthread_mutex_lock(&sc->mtx); + offset = regoff & ~0x3; /* round down to a multiple of 4 bytes */ if (offset < AHCI_OFFSET) value = pci_ahci_host_read(sc, offset); else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP) value = pci_ahci_port_read(sc, offset); else { value = 0; - WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset); + WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", + regoff); } + value >>= 8 * (regoff & 0x3); pthread_mutex_unlock(&sc->mtx); diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index a08ac4c..03ff0c0 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -1938,7 +1938,7 @@ INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); #define DMEMSZ 4096 struct pci_emul_dsoftc { uint8_t ioregs[DIOSZ]; - uint8_t memregs[DMEMSZ]; + uint8_t memregs[2][DMEMSZ]; }; #define PCI_EMUL_MSI_MSGS 4 @@ -1967,6 +1967,9 @@ pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); assert(error == 0); + error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); + assert(error == 0); + return (0); } @@ -2006,21 +2009,23 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, } } - if (baridx == 1) { + if (baridx == 1 || baridx == 2) { if (offset + size > DMEMSZ) { printf("diow: memw too large, offset %ld size %d\n", offset, size); return; } + i = baridx - 1; /* 'memregs' index */ + if (size == 1) { - sc->memregs[offset] = value; + sc->memregs[i][offset] = value; } else if (size == 2) { - *(uint16_t *)&sc->memregs[offset] = value; + *(uint16_t *)&sc->memregs[i][offset] = value; } else if (size == 4) { - *(uint32_t *)&sc->memregs[offset] = value; + *(uint32_t *)&sc->memregs[i][offset] = value; } else if (size == 8) { - *(uint64_t *)&sc->memregs[offset] = value; + *(uint64_t *)&sc->memregs[i][offset] = value; } else { printf("diow: memw unknown size %d\n", size); } @@ -2030,7 +2035,7 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, */ } - if (baridx > 1) { + if (baridx > 2) { printf("diow: unknown bar idx %d\n", baridx); } } @@ -2041,6 +2046,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, { struct pci_emul_dsoftc *sc = pi->pi_arg; uint32_t value; + int i; if (baridx == 0) { if (offset + size > DIOSZ) { @@ -2059,29 +2065,31 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, printf("dior: ior unknown size %d\n", size); } } - - if (baridx == 1) { + + if (baridx == 1 || baridx == 2) { if (offset + size > DMEMSZ) { printf("dior: memr too large, offset %ld size %d\n", offset, size); return (0); } - + + i = baridx - 1; /* 'memregs' index */ + if (size == 1) { - value = sc->memregs[offset]; + value = sc->memregs[i][offset]; } else if (size == 2) { - value = *(uint16_t *) &sc->memregs[offset]; + value = *(uint16_t *) &sc->memregs[i][offset]; } else if (size == 4) { - value = *(uint32_t *) &sc->memregs[offset]; + value = *(uint32_t *) &sc->memregs[i][offset]; } else if (size == 8) { - value = *(uint64_t *) &sc->memregs[offset]; + value = *(uint64_t *) &sc->memregs[i][offset]; } else { printf("dior: ior unknown size %d\n", size); } } - if (baridx > 1) { + if (baridx > 2) { printf("dior: unknown bar idx %d\n", baridx); return (0); } diff --git a/usr.sbin/bhyve/pci_hostbridge.c b/usr.sbin/bhyve/pci_hostbridge.c index 54a25ae..5c9ea28 100644 --- a/usr.sbin/bhyve/pci_hostbridge.c +++ b/usr.sbin/bhyve/pci_hostbridge.c @@ -38,7 +38,7 @@ pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* config space */ pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */ pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */ - pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_BRIDGE); + pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST); diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c index 35daa1f..8500be6 100644 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -370,6 +370,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) { blockif_close(sc->bc); diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index 1a029d5..3781ea9 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -640,6 +640,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); /* Link is up if we managed to open tap device. */ sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0); diff --git a/usr.sbin/bhyve/pci_virtio_rnd.c b/usr.sbin/bhyve/pci_virtio_rnd.c index 92d1d6f..78448f5 100644 --- a/usr.sbin/bhyve/pci_virtio_rnd.c +++ b/usr.sbin/bhyve/pci_virtio_rnd.c @@ -170,6 +170,7 @@ pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_CRYPTO); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_ENTROPY); + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix())) return (1); diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c index ba6a9d2..69dfaae 100644 --- a/usr.sbin/bhyve/task_switch.c +++ b/usr.sbin/bhyve/task_switch.c @@ -202,7 +202,8 @@ desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel) */ static int desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint16_t sel, struct user_segment_descriptor *desc, bool doread) + uint16_t sel, struct user_segment_descriptor *desc, bool doread, + int *faultptr) { struct iovec iov[2]; uint64_t base; @@ -215,28 +216,30 @@ desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, assert(limit >= SEL_LIMIT(sel)); error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel), - sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov)); - if (error == 0) { - if (doread) - vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc)); - else - vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc)); - } - return (error); + sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov), + faultptr); + if (error || *faultptr) + return (error); + + if (doread) + vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc)); + else + vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc)); + return (0); } static int desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint16_t sel, struct user_segment_descriptor *desc) + uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) { - return (desc_table_rw(ctx, vcpu, paging, sel, desc, true)); + return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr)); } static int desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint16_t sel, struct user_segment_descriptor *desc) + uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) { - return (desc_table_rw(ctx, vcpu, paging, sel, desc, false)); + return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr)); } /* @@ -248,7 +251,7 @@ desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, */ static int read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, - uint16_t sel, struct user_segment_descriptor *desc) + uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) { struct vm_guest_paging sup_paging; int error; @@ -267,7 +270,7 @@ read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, sup_paging = ts->paging; sup_paging.cpl = 0; /* implicit supervisor mode */ - error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc); + error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr); return (error); } @@ -301,14 +304,10 @@ ldt_desc(int sd_type) /* * Validate the descriptor 'seg_desc' associated with 'segment'. - * - * Returns 0 on success. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. */ static int validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, - int segment, struct seg_desc *seg_desc) + int segment, struct seg_desc *seg_desc, int *faultptr) { struct vm_guest_paging sup_paging; struct user_segment_descriptor usd; @@ -369,8 +368,8 @@ validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, /* Read the descriptor from the GDT/LDT */ sup_paging = ts->paging; sup_paging.cpl = 0; /* implicit supervisor mode */ - error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd); - if (error) + error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr); + if (error || *faultptr) return (error); /* Verify that the descriptor type is compatible with the segment */ @@ -476,14 +475,10 @@ update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd) /* * Update the vcpu registers to reflect the state of the new task. - * - * Returns 0 on success. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. */ static int tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, - uint16_t ot_sel, struct tss32 *tss, struct iovec *iov) + uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr) { struct seg_desc seg_desc, seg_desc2; uint64_t *pdpte, maxphyaddr, reserved; @@ -565,8 +560,9 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss)); /* Validate segment descriptors */ - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc); @@ -579,33 +575,40 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, * VM-entry checks so the guest can handle any exception injected * during task switch emulation. */ - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc, + faultptr); + if (error || *faultptr) return (error); - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2); - if (error) + + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc); update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2); ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK; - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc); - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc); - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc); - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc); @@ -616,14 +619,10 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, * Push an error code on the stack of the new task. This is needed if the * task switch was triggered by a hardware exception that causes an error * code to be saved (e.g. #PF). - * - * Returns 0 on success. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. */ static int push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - int task_type, uint32_t errcode) + int task_type, uint32_t errcode, int *faultptr) { struct iovec iov[2]; struct seg_desc seg_desc; @@ -632,6 +631,8 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint32_t esp; uint16_t stacksel; + *faultptr = 0; + cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0); rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS); @@ -666,17 +667,19 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) { sel_exception(ctx, vcpu, IDT_SS, stacksel, 1); - return (1); + *faultptr = 1; + return (0); } if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { vm_inject_ac(ctx, vcpu, 1); - return (1); + *faultptr = 1; + return (0); } error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE, - iov, nitems(iov)); - if (error) + iov, nitems(iov), faultptr); + if (error || *faultptr) return (error); vm_copyout(ctx, vcpu, &errcode, iov, bytes); @@ -687,16 +690,13 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, /* * Evaluate return value from helper functions and potentially return to * the VM run loop. - * 0: success - * +1: an exception was injected into the guest vcpu - * -1: unrecoverable/programming error */ -#define CHKERR(x) \ +#define CHKERR(error,fault) \ do { \ - assert(((x) == 0) || ((x) == 1) || ((x) == -1)); \ - if ((x) == -1) \ + assert((error == 0) || (error == EFAULT)); \ + if (error) \ return (VMEXIT_ABORT); \ - else if ((x) == 1) \ + else if (fault) \ return (VMEXIT_CONTINUE); \ } while (0) @@ -711,7 +711,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) struct iovec nt_iov[2], ot_iov[2]; uint64_t cr0, ot_base; uint32_t eip, ot_lim, access; - int error, ext, minlimit, nt_type, ot_type, vcpu; + int error, ext, fault, minlimit, nt_type, ot_type, vcpu; enum task_switch_reason reason; uint16_t nt_sel, ot_sel; @@ -739,8 +739,9 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) sup_paging.cpl = 0; /* implicit supervisor mode */ /* Fetch the new TSS descriptor */ - error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc); - CHKERR(error); + error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc, + &fault); + CHKERR(error, fault); nt = usd_to_seg_desc(&nt_desc); @@ -792,8 +793,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) /* Fetch the new TSS */ error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1, - PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov)); - CHKERR(error); + PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault); + CHKERR(error, fault); vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1); /* Get the old TSS selector from the guest's task register */ @@ -818,13 +819,14 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY); /* Fetch the old TSS descriptor */ - error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc); - CHKERR(error); + error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc, + &fault); + CHKERR(error, fault); /* Get the old TSS */ error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1, - PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov)); - CHKERR(error); + PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault); + CHKERR(error, fault); vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1); /* @@ -834,8 +836,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) if (reason == TSR_IRET || reason == TSR_JMP) { ot_desc.sd_type &= ~0x2; error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel, - &ot_desc); - CHKERR(error); + &ot_desc, &fault); + CHKERR(error, fault); } if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) { @@ -853,8 +855,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) if (reason != TSR_IRET) { nt_desc.sd_type |= 0x2; error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel, - &nt_desc); - CHKERR(error); + &nt_desc, &fault); + CHKERR(error, fault); } /* Update task register to point at the new TSS */ @@ -877,8 +879,9 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) assert(error == 0); /* Load processor state from new TSS */ - error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov); - CHKERR(error); + error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov, + &fault); + CHKERR(error, fault); /* * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception @@ -889,8 +892,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) assert(task_switch->ext); assert(task_switch->reason == TSR_IDT_GATE); error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type, - task_switch->errcode); - CHKERR(error); + task_switch->errcode, &fault); + CHKERR(error, fault); } /* diff --git a/usr.sbin/bhyve/virtio.c b/usr.sbin/bhyve/virtio.c index 41a9e42..11b1e62 100644 --- a/usr.sbin/bhyve/virtio.c +++ b/usr.sbin/bhyve/virtio.c @@ -316,7 +316,7 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx, if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) { _vq_record(i, vdir, ctx, iov, n_iov, flags); i++; - } else if ((vs->vs_negotiated_caps & + } else if ((vs->vs_vc->vc_hv_caps & VIRTIO_RING_F_INDIRECT_DESC) == 0) { fprintf(stderr, "%s: descriptor has forbidden INDIRECT flag, " |