diff options
author | bapt <bapt@FreeBSD.org> | 2015-05-03 19:30:11 +0000 |
---|---|---|
committer | bapt <bapt@FreeBSD.org> | 2015-05-03 19:30:11 +0000 |
commit | b5633ba2a5f5c5e3fddb07ef1f3c114268f7ea42 (patch) | |
tree | 43730ad7995cdbd70d25f4f9739ec3b2abd10b91 /usr.sbin/bhyve | |
parent | 249cdb0bbbb0d59f85a71e76323eeed99d6d3134 (diff) | |
parent | 4cd4238e928bc196c424f1549c026c4f4407fba6 (diff) | |
download | FreeBSD-src-b5633ba2a5f5c5e3fddb07ef1f3c114268f7ea42.zip FreeBSD-src-b5633ba2a5f5c5e3fddb07ef1f3c114268f7ea42.tar.gz |
Merge from head
Diffstat (limited to 'usr.sbin/bhyve')
-rw-r--r-- | usr.sbin/bhyve/Makefile | 2 | ||||
-rw-r--r-- | usr.sbin/bhyve/acpi.c | 2 | ||||
-rw-r--r-- | usr.sbin/bhyve/bhyverun.c | 11 | ||||
-rw-r--r-- | usr.sbin/bhyve/block_if.c | 108 | ||||
-rw-r--r-- | usr.sbin/bhyve/block_if.h | 1 | ||||
-rw-r--r-- | usr.sbin/bhyve/ioapic.c | 2 | ||||
-rw-r--r-- | usr.sbin/bhyve/ioapic.h | 2 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_ahci.c | 249 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_emul.c | 125 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_irq.c | 2 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_irq.h | 2 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_virtio_block.c | 5 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_virtio_net.c | 8 | ||||
-rw-r--r-- | usr.sbin/bhyve/pm.c | 2 |
14 files changed, 332 insertions, 189 deletions
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index 0273b0f..70b7041 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -44,7 +44,7 @@ SRCS= \ .PATH: ${.CURDIR}/../../sys/amd64/vmm SRCS+= vmm_instruction_emul.c -LIBADD= vmmapi md util pthread +LIBADD= vmmapi md pthread WARNS?= 2 diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index a5a6559..a9dd1cc 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -386,7 +386,7 @@ basl_fwrite_fadt(FILE *fp) EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n"); - EFPRINTF(fp, "[0001]\t\tRTC Century Index : 00\n"); + EFPRINTF(fp, "[0001]\t\tRTC Century Index : 32\n"); EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n"); EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n"); EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n"); diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 271f67c..47a7699 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -325,8 +325,10 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) error = emulate_inout(ctx, vcpu, vme, strictio); if (error) { - fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out", - bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); + fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", + in ? "in" : "out", + bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), + port, vmexit->rip); return (VMEXIT_ABORT); } else { return (VMEXIT_CONTINUE); @@ -803,6 +805,11 @@ main(int argc, char *argv[]) exit(1); } + if (guest_ncpus < 1) { + fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); + exit(1); + } + max_vcpus = num_vcpus_allowed(ctx); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index b8b27a6..bcb1617 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -85,6 +85,7 @@ struct blockif_ctxt { int bc_magic; int bc_fd; int bc_ischr; + int bc_isgeom; int bc_candelete; int bc_rdonly; off_t bc_size; @@ -198,27 +199,93 @@ blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) } static void -blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) +blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) { struct blockif_req *br; off_t arg[2]; - int err; + ssize_t clen, len, off, boff, voff; + int i, err; br = be->be_req; + if (br->br_iovcnt <= 1) + buf = NULL; err = 0; - switch (be->be_op) { case BOP_READ: - if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, - br->br_offset) < 0) - err = errno; + if (buf == NULL) { + if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, + br->br_offset)) < 0) + err = errno; + else + br->br_resid -= len; + break; + } + i = 0; + off = voff = 0; + while (br->br_resid > 0) { + len = MIN(br->br_resid, MAXPHYS); + if (pread(bc->bc_fd, buf, len, br->br_offset + + off) < 0) { + err = errno; + break; + } + boff = 0; + do { + clen = MIN(len - boff, br->br_iov[i].iov_len - + voff); + memcpy(br->br_iov[i].iov_base + voff, + buf + boff, clen); + if (clen < br->br_iov[i].iov_len - voff) + voff += clen; + else { + i++; + voff = 0; + } + boff += clen; + } while (boff < len); + off += len; + br->br_resid -= len; + } break; case BOP_WRITE: - if (bc->bc_rdonly) + if (bc->bc_rdonly) { err = EROFS; - else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, - br->br_offset) < 0) - err = errno; + break; + } + if (buf == NULL) { + if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, + br->br_offset)) < 0) + err = errno; + else + br->br_resid -= len; + break; + } + i = 0; + off = voff = 0; + while (br->br_resid > 0) { + len = MIN(br->br_resid, MAXPHYS); + boff = 0; + do { + clen = MIN(len - boff, br->br_iov[i].iov_len - + voff); + memcpy(buf + boff, + br->br_iov[i].iov_base + voff, clen); + if (clen < br->br_iov[i].iov_len - voff) + voff += clen; + else { + i++; + voff = 0; + } + boff += clen; + } while (boff < len); + if (pwrite(bc->bc_fd, buf, len, br->br_offset + + off) < 0) { + err = errno; + break; + } + off += len; + br->br_resid -= len; + } break; case BOP_FLUSH: if (bc->bc_ischr) { @@ -234,9 +301,11 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) err = EROFS; else if (bc->bc_ischr) { arg[0] = br->br_offset; - arg[1] = br->br_iov[0].iov_len; + arg[1] = br->br_resid; if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) err = errno; + else + br->br_resid = 0; } else err = EOPNOTSUPP; break; @@ -256,15 +325,20 @@ blockif_thr(void *arg) struct blockif_ctxt *bc; struct blockif_elem *be; pthread_t t; + uint8_t *buf; bc = arg; + if (bc->bc_isgeom) + buf = malloc(MAXPHYS); + else + buf = NULL; t = pthread_self(); pthread_mutex_lock(&bc->bc_mtx); for (;;) { while (blockif_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); - blockif_proc(bc, be); + blockif_proc(bc, be, buf); pthread_mutex_lock(&bc->bc_mtx); blockif_complete(bc, be); } @@ -275,6 +349,8 @@ blockif_thr(void *arg) } pthread_mutex_unlock(&bc->bc_mtx); + if (buf) + free(buf); pthread_exit(NULL); return (NULL); } @@ -315,13 +391,14 @@ struct blockif_ctxt * blockif_open(const char *optstr, const char *ident) { char tname[MAXCOMLEN + 1]; + char name[MAXPATHLEN]; char *nopt, *xopts; struct blockif_ctxt *bc; struct stat sbuf; struct diocgattr_arg arg; off_t size, psectsz, psectoff; int extra, fd, i, sectsz; - int nocache, sync, ro, candelete; + int nocache, sync, ro, candelete, geom; pthread_once(&blockif_once, blockif_init); @@ -375,7 +452,7 @@ blockif_open(const char *optstr, const char *ident) size = sbuf.st_size; sectsz = DEV_BSIZE; psectsz = psectoff = 0; - candelete = 0; + candelete = geom = 0; if (S_ISCHR(sbuf.st_mode)) { if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || ioctl(fd, DIOCGSECTORSIZE, §sz)) { @@ -391,6 +468,8 @@ blockif_open(const char *optstr, const char *ident) arg.len = sizeof(arg.value.i); if (ioctl(fd, DIOCGATTR, &arg) == 0) candelete = arg.value.i; + if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) + geom = 1; } else psectsz = sbuf.st_blksize; @@ -403,6 +482,7 @@ blockif_open(const char *optstr, const char *ident) bc->bc_magic = BLOCKIF_SIG; bc->bc_fd = fd; bc->bc_ischr = S_ISCHR(sbuf.st_mode); + bc->bc_isgeom = geom; bc->bc_candelete = candelete; bc->bc_rdonly = ro; bc->bc_size = size; diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h index 1cb7c80..8e63407 100644 --- a/usr.sbin/bhyve/block_if.h +++ b/usr.sbin/bhyve/block_if.h @@ -45,6 +45,7 @@ struct blockif_req { struct iovec br_iov[BLOCKIF_IOV_MAX]; int br_iovcnt; off_t br_offset; + ssize_t br_resid; void (*br_callback)(struct blockif_req *req, int err); void *br_param; }; diff --git a/usr.sbin/bhyve/ioapic.c b/usr.sbin/bhyve/ioapic.c index 2950d9a..0ad69d9 100644 --- a/usr.sbin/bhyve/ioapic.c +++ b/usr.sbin/bhyve/ioapic.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014 Advanced Computing Technologies LLC + * Copyright (c) 2014 Hudson River Trading LLC * Written by: John H. Baldwin <jhb@FreeBSD.org> * All rights reserved. * diff --git a/usr.sbin/bhyve/ioapic.h b/usr.sbin/bhyve/ioapic.h index 3cfca4f..efdd3c6 100644 --- a/usr.sbin/bhyve/ioapic.h +++ b/usr.sbin/bhyve/ioapic.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014 Advanced Computing Technologies LLC + * Copyright (c) 2014 Hudson River Trading LLC * Written by: John H. Baldwin <jhb@FreeBSD.org> * All rights reserved. * diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c index e8cb2da..2d05789 100644 --- a/usr.sbin/bhyve/pci_ahci.c +++ b/usr.sbin/bhyve/pci_ahci.c @@ -124,7 +124,7 @@ struct ahci_ioreq { uint32_t len; uint32_t done; int slot; - int prdtl; + int more; }; struct ahci_port { @@ -269,22 +269,24 @@ ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis) case FIS_TYPE_REGD2H: offset = 0x40; len = 20; - irq = AHCI_P_IX_DHR; + irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0; break; case FIS_TYPE_SETDEVBITS: offset = 0x58; len = 8; - irq = AHCI_P_IX_SDB; + irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0; break; case FIS_TYPE_PIOSETUP: offset = 0x20; len = 20; - irq = 0; + irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0; break; default: WPRINTF("unsupported fis type %d\n", ft); return; } + if (fis[2] & ATA_S_ERROR) + irq |= AHCI_P_IX_TFE; memcpy(p->rfis + offset, fis, len); if (irq) { p->is |= irq; @@ -309,22 +311,23 @@ ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd) uint8_t error; error = (tfd >> 8) & 0xff; + tfd &= 0x77; memset(fis, 0, sizeof(fis)); fis[0] = FIS_TYPE_SETDEVBITS; fis[1] = (1 << 6); - fis[2] = tfd & 0x77; + fis[2] = tfd; fis[3] = error; if (fis[2] & ATA_S_ERROR) { - p->is |= AHCI_P_IX_TFE; p->err_cfis[0] = slot; - p->err_cfis[2] = tfd & 0x77; + p->err_cfis[2] = tfd; p->err_cfis[3] = error; memcpy(&p->err_cfis[4], cfis + 4, 16); } else { *(uint32_t *)(fis + 4) = (1 << slot); p->sact &= ~(1 << slot); } - p->tfd = tfd; + p->tfd &= ~0x77; + p->tfd |= tfd; ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis); } @@ -351,7 +354,6 @@ ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd) fis[12] = cfis[12]; fis[13] = cfis[13]; if (fis[2] & ATA_S_ERROR) { - p->is |= AHCI_P_IX_TFE; p->err_cfis[0] = 0x80; p->err_cfis[2] = tfd & 0xff; p->err_cfis[3] = error; @@ -363,6 +365,21 @@ ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd) } static void +ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot) +{ + uint8_t fis[20]; + + p->tfd = ATA_S_READY | ATA_S_DSC; + memset(fis, 0, sizeof(fis)); + fis[0] = FIS_TYPE_REGD2H; + fis[1] = 0; /* No interrupt */ + fis[2] = p->tfd; /* Status */ + fis[3] = 0; /* No error */ + p->ci &= ~(1 << slot); + ahci_write_fis(p, FIS_TYPE_REGD2H, fis); +} + +static void ahci_write_reset_fis_d2h(struct ahci_port *p) { uint8_t fis[20]; @@ -418,7 +435,8 @@ ahci_port_stop(struct ahci_port *p) slot = aior->slot; cfis = aior->cfis; if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || - cfis[2] == ATA_READ_FPDMA_QUEUED) + cfis[2] == ATA_READ_FPDMA_QUEUED || + cfis[2] == ATA_SEND_FPDMA_QUEUED) ncq = 1; if (ncq) @@ -489,6 +507,9 @@ ahci_reset(struct pci_ahci_softc *sc) for (i = 0; i < sc->ports; i++) { sc->port[i].ie = 0; sc->port[i].is = 0; + sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD); + if (sc->port[i].bctx) + sc->port[i].cmd |= AHCI_P_CMD_CPS; sc->port[i].sctl = 0; ahci_port_reset(&sc->port[i]); } @@ -520,26 +541,79 @@ atapi_string(uint8_t *dest, const char *src, int len) } } +/* + * Build up the iovec based on the PRDT, 'done' and 'len'. + */ +static void +ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior, + struct ahci_prdt_entry *prdt, uint16_t prdtl) +{ + struct blockif_req *breq = &aior->io_req; + int i, j, skip, todo, left, extra; + uint32_t dbcsz; + + /* Copy part of PRDT between 'done' and 'len' bytes into the iov. */ + skip = aior->done; + left = aior->len - aior->done; + todo = 0; + for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0; + i++, prdt++) { + dbcsz = (prdt->dbc & DBCMASK) + 1; + /* Skip already done part of the PRDT */ + if (dbcsz <= skip) { + skip -= dbcsz; + continue; + } + dbcsz -= skip; + if (dbcsz > left) + dbcsz = left; + breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc), + prdt->dba + skip, dbcsz); + breq->br_iov[j].iov_len = dbcsz; + todo += dbcsz; + left -= dbcsz; + skip = 0; + j++; + } + + /* If we got limited by IOV length, round I/O down to sector size. */ + if (j == BLOCKIF_IOV_MAX) { + extra = todo % blockif_sectsz(p->bctx); + todo -= extra; + assert(todo > 0); + while (extra > 0) { + if (breq->br_iov[j - 1].iov_len > extra) { + breq->br_iov[j - 1].iov_len -= extra; + break; + } + extra -= breq->br_iov[j - 1].iov_len; + j--; + } + } + + breq->br_iovcnt = j; + breq->br_resid = todo; + aior->done += todo; + aior->more = (aior->done < aior->len && i < prdtl); +} + static void -ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done, - int seek) +ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) { struct ahci_ioreq *aior; struct blockif_req *breq; - struct pci_ahci_softc *sc; struct ahci_prdt_entry *prdt; struct ahci_cmd_hdr *hdr; uint64_t lba; uint32_t len; - int i, err, iovcnt, ncq, readop; + int err, first, ncq, readop; - sc = p->pr_sc; prdt = (struct ahci_prdt_entry *)(cfis + 0x80); hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); ncq = 0; readop = 1; + first = (done == 0); - prdt += seek; if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 || cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 || cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 || @@ -580,57 +654,33 @@ ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done, lba *= blockif_sectsz(p->bctx); len *= blockif_sectsz(p->bctx); - /* - * Pull request off free list - */ + /* Pull request off free list */ aior = STAILQ_FIRST(&p->iofhd); assert(aior != NULL); STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); + aior->cfis = cfis; aior->slot = slot; aior->len = len; aior->done = done; breq = &aior->io_req; breq->br_offset = lba + done; - iovcnt = hdr->prdtl - seek; - if (iovcnt > BLOCKIF_IOV_MAX) { - aior->prdtl = iovcnt - BLOCKIF_IOV_MAX; - iovcnt = BLOCKIF_IOV_MAX; - } else - aior->prdtl = 0; - breq->br_iovcnt = iovcnt; + ahci_build_iov(p, aior, prdt, hdr->prdtl); - /* - * Mark this command in-flight. - */ + /* Mark this command in-flight. */ p->pending |= 1 << slot; - /* - * Stuff request onto busy list - */ + /* Stuff request onto busy list. */ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); - /* - * Build up the iovec based on the prdt - */ - for (i = 0; i < iovcnt; i++) { - uint32_t dbcsz; + if (ncq && first) + ahci_write_fis_d2h_ncq(p, slot); - dbcsz = (prdt->dbc & DBCMASK) + 1; - breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc), - prdt->dba, dbcsz); - breq->br_iov[i].iov_len = dbcsz; - aior->done += dbcsz; - prdt++; - } if (readop) err = blockif_read(p->bctx, breq); else err = blockif_write(p->bctx, breq); assert(err == 0); - - if (ncq) - p->ci &= ~(1 << slot); } static void @@ -650,7 +700,7 @@ ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis) aior->slot = slot; aior->len = 0; aior->done = 0; - aior->prdtl = 0; + aior->more = 0; breq = &aior->io_req; /* @@ -703,15 +753,18 @@ ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done uint8_t *entry; uint64_t elba; uint32_t len, elen; - int err; + int err, first, ncq; uint8_t buf[512]; + first = (done == 0); if (cfis[2] == ATA_DATA_SET_MANAGEMENT) { len = (uint16_t)cfis[13] << 8 | cfis[12]; len *= 512; + ncq = 0; } else { /* ATA_SEND_FPDMA_QUEUED */ len = (uint16_t)cfis[11] << 8 | cfis[3]; len *= 512; + ncq = 1; } read_prdt(p, slot, cfis, buf, sizeof(buf)); @@ -745,12 +798,11 @@ next: aior->slot = slot; aior->len = len; aior->done = done; - aior->prdtl = 0; + aior->more = (len != done); breq = &aior->io_req; breq->br_offset = elba * blockif_sectsz(p->bctx); - breq->br_iovcnt = 1; - breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx); + breq->br_resid = elen * blockif_sectsz(p->bctx); /* * Mark this command in-flight. @@ -762,6 +814,9 @@ next: */ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); + if (ncq && first) + ahci_write_fis_d2h_ncq(p, slot); + err = blockif_delete(p->bctx, breq); assert(err == 0); } @@ -903,7 +958,6 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis) buf[88] = 0x7f; if (p->xfermode & ATA_UDMA0) buf[88] |= (1 << ((p->xfermode & 7) + 8)); - buf[93] = (1 | 1 <<14); buf[100] = sectors; buf[101] = (sectors >> 16); buf[102] = (sectors >> 32); @@ -1242,8 +1296,7 @@ atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis) } static void -atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, - uint32_t done, int seek) +atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) { struct ahci_ioreq *aior; struct ahci_cmd_hdr *hdr; @@ -1253,14 +1306,13 @@ atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint8_t *acmd; uint64_t lba; uint32_t len; - int i, err, iovcnt; + int err; sc = p->pr_sc; acmd = cfis + 0x40; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); prdt = (struct ahci_prdt_entry *)(cfis + 0x80); - prdt += seek; lba = be32dec(acmd + 2); if (acmd[0] == READ_10) len = be16dec(acmd + 7); @@ -1285,37 +1337,14 @@ atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, aior->done = done; breq = &aior->io_req; breq->br_offset = lba + done; - iovcnt = hdr->prdtl - seek; - if (iovcnt > BLOCKIF_IOV_MAX) { - aior->prdtl = iovcnt - BLOCKIF_IOV_MAX; - iovcnt = BLOCKIF_IOV_MAX; - } else - aior->prdtl = 0; - breq->br_iovcnt = iovcnt; + ahci_build_iov(p, aior, prdt, hdr->prdtl); - /* - * Mark this command in-flight. - */ + /* Mark this command in-flight. */ p->pending |= 1 << slot; - /* - * Stuff request onto busy list - */ + /* Stuff request onto busy list. */ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); - /* - * Build up the iovec based on the prdt - */ - for (i = 0; i < iovcnt; i++) { - uint32_t dbcsz; - - dbcsz = (prdt->dbc & DBCMASK) + 1; - breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc), - prdt->dba, dbcsz); - breq->br_iov[i].iov_len = dbcsz; - aior->done += dbcsz; - prdt++; - } err = blockif_read(p->bctx, breq); assert(err == 0); } @@ -1515,7 +1544,7 @@ handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis) break; case READ_10: case READ_12: - atapi_read(p, slot, cfis, 0, 0); + atapi_read(p, slot, cfis, 0); break; case REQUEST_SENSE: atapi_request_sense(p, slot, cfis); @@ -1543,6 +1572,7 @@ static void ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis) { + p->tfd |= ATA_S_BUSY; switch (cfis[2]) { case ATA_ATA_IDENTIFY: handle_identify(p, slot, cfis); @@ -1614,7 +1644,7 @@ ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis) case ATA_WRITE_DMA48: case ATA_READ_FPDMA_QUEUED: case ATA_WRITE_FPDMA_QUEUED: - ahci_handle_dma(p, slot, cfis, 0, 0); + ahci_handle_rw(p, slot, cfis, 0); break; case ATA_FLUSHCACHE: case ATA_FLUSHCACHE48: @@ -1755,7 +1785,7 @@ ata_ioreq_cb(struct blockif_req *br, int err) struct pci_ahci_softc *sc; uint32_t tfd; uint8_t *cfis; - int pending, slot, ncq, dsm; + int slot, ncq, dsm; DPRINTF("%s %d\n", __func__, err); @@ -1764,7 +1794,6 @@ ata_ioreq_cb(struct blockif_req *br, int err) p = aior->io_pr; cfis = aior->cfis; slot = aior->slot; - pending = aior->prdtl; sc = p->pr_sc; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); @@ -1792,25 +1821,18 @@ ata_ioreq_cb(struct blockif_req *br, int err) if (!err) hdr->prdbc = aior->done; - if (dsm) { - if (aior->done != aior->len && !err) { + if (!err && aior->more) { + if (dsm) ahci_handle_dsm_trim(p, slot, cfis, aior->done); - goto out; - } - } else { - if (pending && !err) { - ahci_handle_dma(p, slot, cfis, aior->done, - hdr->prdtl - pending); - goto out; - } + else + ahci_handle_rw(p, slot, cfis, aior->done); + goto out; } - if (!err && aior->done == aior->len) { + if (!err) tfd = ATA_S_READY | ATA_S_DSC; - } else { + else tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR; - } - if (ncq) ahci_write_fis_sdb(p, slot, cfis, tfd); else @@ -1836,7 +1858,7 @@ atapi_ioreq_cb(struct blockif_req *br, int err) struct pci_ahci_softc *sc; uint8_t *cfis; uint32_t tfd; - int pending, slot; + int slot; DPRINTF("%s %d\n", __func__, err); @@ -1844,7 +1866,6 @@ atapi_ioreq_cb(struct blockif_req *br, int err) p = aior->io_pr; cfis = aior->cfis; slot = aior->slot; - pending = aior->prdtl; sc = p->pr_sc; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE); @@ -1863,19 +1884,18 @@ atapi_ioreq_cb(struct blockif_req *br, int err) if (!err) hdr->prdbc = aior->done; - if (pending && !err) { - atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending); + if (!err && aior->more) { + atapi_read(p, slot, cfis, aior->done); goto out; } - if (!err && aior->done == aior->len) { + if (!err) { tfd = ATA_S_READY | ATA_S_DSC; } else { p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x21; tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; } - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, tfd); @@ -1949,8 +1969,15 @@ pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) break; case AHCI_P_CMD: { - p->cmd = value; - + p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD | + AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE | + AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE | + AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK); + p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD | + AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE | + AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE | + AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value; + if (!(value & AHCI_P_CMD_ST)) { ahci_port_stop(p); } else { @@ -1978,6 +2005,10 @@ pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) p->cmd &= ~AHCI_P_CMD_CLO; } + if (value & AHCI_P_CMD_ICC_MASK) { + p->cmd &= ~AHCI_P_CMD_ICC_MASK; + } + ahci_handle_port(p); break; } diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 6b906ed..03ff0c0 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -59,17 +59,6 @@ __FBSDID("$FreeBSD$"); #define CONF1_ENABLE 0x80000000ul -#define CFGWRITE(pi,off,val,b) \ -do { \ - if ((b) == 1) { \ - pci_set_cfgdata8((pi),(off),(val)); \ - } else if ((b) == 2) { \ - pci_set_cfgdata16((pi),(off),(val)); \ - } else { \ - pci_set_cfgdata32((pi),(off),(val)); \ - } \ -} while (0) - #define MAXBUSES (PCI_BUSMAX + 1) #define MAXSLOTS (PCI_SLOTMAX + 1) #define MAXFUNCS (PCI_FUNCMAX + 1) @@ -124,6 +113,30 @@ static void pci_lintr_update(struct pci_devinst *pi); static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, int coff, int bytes, uint32_t *val); +static __inline void +CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) +{ + + if (bytes == 1) + pci_set_cfgdata8(pi, coff, val); + else if (bytes == 2) + pci_set_cfgdata16(pi, coff, val); + else + pci_set_cfgdata32(pi, coff, val); +} + +static __inline uint32_t +CFGREAD(struct pci_devinst *pi, int coff, int bytes) +{ + + if (bytes == 1) + return (pci_get_cfgdata8(pi, coff)); + else if (bytes == 2) + return (pci_get_cfgdata16(pi, coff)); + else + return (pci_get_cfgdata32(pi, coff)); +} + /* * I/O access */ @@ -1653,27 +1666,31 @@ pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) } } -static uint32_t -bits_changed(uint32_t old, uint32_t new, uint32_t mask) -{ - - return ((old ^ new) & mask); -} - static void -pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) +pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) { - int i; - uint16_t old; + int i, rshift; + uint32_t cmd, cmd2, changed, old, readonly; + + cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ /* - * The command register is at an offset of 4 bytes and thus the - * guest could write 1, 2 or 4 bytes starting at this offset. + * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. + * + * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are + * 'write 1 to clear'. However these bits are not set to '1' by + * any device emulation so it is simpler to treat them as readonly. */ + rshift = (coff & 0x3) * 8; + readonly = 0xFFFFF880 >> rshift; - old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ - CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ - new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ + old = CFGREAD(pi, coff, bytes); + new &= ~readonly; + new |= (old & readonly); + CFGWRITE(pi, coff, new, bytes); /* update config */ + + cmd2 = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ + changed = cmd ^ cmd2; /* * If the MMIO or I/O address space decoding has changed then @@ -1686,7 +1703,7 @@ pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) break; case PCIBAR_IO: /* I/O address space decoding changed? */ - if (bits_changed(old, new, PCIM_CMD_PORTEN)) { + if (changed & PCIM_CMD_PORTEN) { if (porten(pi)) register_bar(pi, i); else @@ -1696,7 +1713,7 @@ pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) case PCIBAR_MEM32: case PCIBAR_MEM64: /* MMIO address space decoding changed? */ - if (bits_changed(old, new, PCIM_CMD_MEMEN)) { + if (changed & PCIM_CMD_MEMEN) { if (memen(pi)) register_bar(pi, i); else @@ -1776,14 +1793,8 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, needcfg = 1; } - if (needcfg) { - if (bytes == 1) - *eax = pci_get_cfgdata8(pi, coff); - else if (bytes == 2) - *eax = pci_get_cfgdata16(pi, coff); - else - *eax = pci_get_cfgdata32(pi, coff); - } + if (needcfg) + *eax = CFGREAD(pi, coff, bytes); pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); } else { @@ -1853,8 +1864,8 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, } else if (pci_emul_iscap(pi, coff)) { pci_emul_capwrite(pi, coff, bytes, *eax); - } else if (coff == PCIR_COMMAND) { - pci_emul_cmdwrite(pi, *eax, bytes); + } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { + pci_emul_cmdsts_write(pi, coff, *eax, bytes); } else { CFGWRITE(pi, coff, *eax, bytes); } @@ -1927,7 +1938,7 @@ INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); #define DMEMSZ 4096 struct pci_emul_dsoftc { uint8_t ioregs[DIOSZ]; - uint8_t memregs[DMEMSZ]; + uint8_t memregs[2][DMEMSZ]; }; #define PCI_EMUL_MSI_MSGS 4 @@ -1956,6 +1967,9 @@ pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); assert(error == 0); + error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); + assert(error == 0); + return (0); } @@ -1995,21 +2009,23 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, } } - if (baridx == 1) { + if (baridx == 1 || baridx == 2) { if (offset + size > DMEMSZ) { printf("diow: memw too large, offset %ld size %d\n", offset, size); return; } + i = baridx - 1; /* 'memregs' index */ + if (size == 1) { - sc->memregs[offset] = value; + sc->memregs[i][offset] = value; } else if (size == 2) { - *(uint16_t *)&sc->memregs[offset] = value; + *(uint16_t *)&sc->memregs[i][offset] = value; } else if (size == 4) { - *(uint32_t *)&sc->memregs[offset] = value; + *(uint32_t *)&sc->memregs[i][offset] = value; } else if (size == 8) { - *(uint64_t *)&sc->memregs[offset] = value; + *(uint64_t *)&sc->memregs[i][offset] = value; } else { printf("diow: memw unknown size %d\n", size); } @@ -2019,7 +2035,7 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, */ } - if (baridx > 1) { + if (baridx > 2) { printf("diow: unknown bar idx %d\n", baridx); } } @@ -2030,6 +2046,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, { struct pci_emul_dsoftc *sc = pi->pi_arg; uint32_t value; + int i; if (baridx == 0) { if (offset + size > DIOSZ) { @@ -2048,29 +2065,31 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, printf("dior: ior unknown size %d\n", size); } } - - if (baridx == 1) { + + if (baridx == 1 || baridx == 2) { if (offset + size > DMEMSZ) { printf("dior: memr too large, offset %ld size %d\n", offset, size); return (0); } - + + i = baridx - 1; /* 'memregs' index */ + if (size == 1) { - value = sc->memregs[offset]; + value = sc->memregs[i][offset]; } else if (size == 2) { - value = *(uint16_t *) &sc->memregs[offset]; + value = *(uint16_t *) &sc->memregs[i][offset]; } else if (size == 4) { - value = *(uint32_t *) &sc->memregs[offset]; + value = *(uint32_t *) &sc->memregs[i][offset]; } else if (size == 8) { - value = *(uint64_t *) &sc->memregs[offset]; + value = *(uint64_t *) &sc->memregs[i][offset]; } else { printf("dior: ior unknown size %d\n", size); } } - if (baridx > 1) { + if (baridx > 2) { printf("dior: unknown bar idx %d\n", baridx); return (0); } diff --git a/usr.sbin/bhyve/pci_irq.c b/usr.sbin/bhyve/pci_irq.c index 20e033f..f22b15c 100644 --- a/usr.sbin/bhyve/pci_irq.c +++ b/usr.sbin/bhyve/pci_irq.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014 Advanced Computing Technologies LLC + * Copyright (c) 2014 Hudson River Trading LLC * Written by: John H. Baldwin <jhb@FreeBSD.org> * All rights reserved. * diff --git a/usr.sbin/bhyve/pci_irq.h b/usr.sbin/bhyve/pci_irq.h index 9d331a5..24f9c99 100644 --- a/usr.sbin/bhyve/pci_irq.h +++ b/usr.sbin/bhyve/pci_irq.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014 Advanced Computing Technologies LLC + * Copyright (c) 2014 Hudson River Trading LLC * Written by: John H. Baldwin <jhb@FreeBSD.org> * All rights reserved. * diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c index 967b528..35daa1f 100644 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -198,7 +198,7 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) struct pci_vtblk_ioreq *io; int i, n; int err; - int iolen; + ssize_t iolen; int writeop, type; off_t offset; struct iovec iov[BLOCKIF_IOV_MAX + 2]; @@ -246,8 +246,9 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); iolen += iov[i].iov_len; } + io->io_req.br_resid = iolen; - DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", + DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r", writeop ? "write" : "read/ident", iolen, i - 1, offset)); switch (type) { diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index 457a405..7227488 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -393,6 +393,7 @@ pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) */ if (sc->vsc_rx_ready == 0) { sc->vsc_rx_ready = 1; + vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; } } @@ -438,6 +439,7 @@ pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) /* Signal the tx thread for processing */ pthread_mutex_lock(&sc->tx_mtx); + vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; if (sc->tx_in_progress == 0) pthread_cond_signal(&sc->tx_cond); pthread_mutex_unlock(&sc->tx_mtx); @@ -466,6 +468,7 @@ pci_vtnet_tx_thread(void *param) for (;;) { /* note - tx mutex is locked here */ do { + vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY; if (sc->resetting) have_work = 0; else @@ -478,6 +481,7 @@ pci_vtnet_tx_thread(void *param) assert(error == 0); } } while (!have_work); + vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; sc->tx_in_progress = 1; pthread_mutex_unlock(&sc->tx_mtx); @@ -640,8 +644,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); - /* link always up */ - sc->vsc_config.status = 1; + /* Link is up if we managed to open tap device. */ + sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0); /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) diff --git a/usr.sbin/bhyve/pm.c b/usr.sbin/bhyve/pm.c index f5a2d43..f7c1c23 100644 --- a/usr.sbin/bhyve/pm.c +++ b/usr.sbin/bhyve/pm.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013 Advanced Computing Technologies LLC + * Copyright (c) 2013 Hudson River Trading LLC * Written by: John H. Baldwin <jhb@FreeBSD.org> * All rights reserved. * |