summaryrefslogtreecommitdiffstats
path: root/usr.sbin/bhyve
diff options
context:
space:
mode:
authorbapt <bapt@FreeBSD.org>2015-05-03 19:30:11 +0000
committerbapt <bapt@FreeBSD.org>2015-05-03 19:30:11 +0000
commitb5633ba2a5f5c5e3fddb07ef1f3c114268f7ea42 (patch)
tree43730ad7995cdbd70d25f4f9739ec3b2abd10b91 /usr.sbin/bhyve
parent249cdb0bbbb0d59f85a71e76323eeed99d6d3134 (diff)
parent4cd4238e928bc196c424f1549c026c4f4407fba6 (diff)
downloadFreeBSD-src-b5633ba2a5f5c5e3fddb07ef1f3c114268f7ea42.zip
FreeBSD-src-b5633ba2a5f5c5e3fddb07ef1f3c114268f7ea42.tar.gz
Merge from head
Diffstat (limited to 'usr.sbin/bhyve')
-rw-r--r--usr.sbin/bhyve/Makefile2
-rw-r--r--usr.sbin/bhyve/acpi.c2
-rw-r--r--usr.sbin/bhyve/bhyverun.c11
-rw-r--r--usr.sbin/bhyve/block_if.c108
-rw-r--r--usr.sbin/bhyve/block_if.h1
-rw-r--r--usr.sbin/bhyve/ioapic.c2
-rw-r--r--usr.sbin/bhyve/ioapic.h2
-rw-r--r--usr.sbin/bhyve/pci_ahci.c249
-rw-r--r--usr.sbin/bhyve/pci_emul.c125
-rw-r--r--usr.sbin/bhyve/pci_irq.c2
-rw-r--r--usr.sbin/bhyve/pci_irq.h2
-rw-r--r--usr.sbin/bhyve/pci_virtio_block.c5
-rw-r--r--usr.sbin/bhyve/pci_virtio_net.c8
-rw-r--r--usr.sbin/bhyve/pm.c2
14 files changed, 332 insertions, 189 deletions
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
index 0273b0f..70b7041 100644
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -44,7 +44,7 @@ SRCS= \
.PATH: ${.CURDIR}/../../sys/amd64/vmm
SRCS+= vmm_instruction_emul.c
-LIBADD= vmmapi md util pthread
+LIBADD= vmmapi md pthread
WARNS?= 2
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
index a5a6559..a9dd1cc 100644
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -386,7 +386,7 @@ basl_fwrite_fadt(FILE *fp)
EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n");
EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n");
EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n");
- EFPRINTF(fp, "[0001]\t\tRTC Century Index : 00\n");
+ EFPRINTF(fp, "[0001]\t\tRTC Century Index : 32\n");
EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n");
EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n");
EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n");
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 271f67c..47a7699 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -325,8 +325,10 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
error = emulate_inout(ctx, vcpu, vme, strictio);
if (error) {
- fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out",
- bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
+ fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
+ in ? "in" : "out",
+ bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
+ port, vmexit->rip);
return (VMEXIT_ABORT);
} else {
return (VMEXIT_CONTINUE);
@@ -803,6 +805,11 @@ main(int argc, char *argv[])
exit(1);
}
+ if (guest_ncpus < 1) {
+ fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
+ exit(1);
+ }
+
max_vcpus = num_vcpus_allowed(ctx);
if (guest_ncpus > max_vcpus) {
fprintf(stderr, "%d vCPUs requested but only %d available\n",
diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c
index b8b27a6..bcb1617 100644
--- a/usr.sbin/bhyve/block_if.c
+++ b/usr.sbin/bhyve/block_if.c
@@ -85,6 +85,7 @@ struct blockif_ctxt {
int bc_magic;
int bc_fd;
int bc_ischr;
+ int bc_isgeom;
int bc_candelete;
int bc_rdonly;
off_t bc_size;
@@ -198,27 +199,93 @@ blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
}
static void
-blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
+blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
{
struct blockif_req *br;
off_t arg[2];
- int err;
+ ssize_t clen, len, off, boff, voff;
+ int i, err;
br = be->be_req;
+ if (br->br_iovcnt <= 1)
+ buf = NULL;
err = 0;
-
switch (be->be_op) {
case BOP_READ:
- if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
- br->br_offset) < 0)
- err = errno;
+ if (buf == NULL) {
+ if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
+ br->br_offset)) < 0)
+ err = errno;
+ else
+ br->br_resid -= len;
+ break;
+ }
+ i = 0;
+ off = voff = 0;
+ while (br->br_resid > 0) {
+ len = MIN(br->br_resid, MAXPHYS);
+ if (pread(bc->bc_fd, buf, len, br->br_offset +
+ off) < 0) {
+ err = errno;
+ break;
+ }
+ boff = 0;
+ do {
+ clen = MIN(len - boff, br->br_iov[i].iov_len -
+ voff);
+ memcpy(br->br_iov[i].iov_base + voff,
+ buf + boff, clen);
+ if (clen < br->br_iov[i].iov_len - voff)
+ voff += clen;
+ else {
+ i++;
+ voff = 0;
+ }
+ boff += clen;
+ } while (boff < len);
+ off += len;
+ br->br_resid -= len;
+ }
break;
case BOP_WRITE:
- if (bc->bc_rdonly)
+ if (bc->bc_rdonly) {
err = EROFS;
- else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
- br->br_offset) < 0)
- err = errno;
+ break;
+ }
+ if (buf == NULL) {
+ if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
+ br->br_offset)) < 0)
+ err = errno;
+ else
+ br->br_resid -= len;
+ break;
+ }
+ i = 0;
+ off = voff = 0;
+ while (br->br_resid > 0) {
+ len = MIN(br->br_resid, MAXPHYS);
+ boff = 0;
+ do {
+ clen = MIN(len - boff, br->br_iov[i].iov_len -
+ voff);
+ memcpy(buf + boff,
+ br->br_iov[i].iov_base + voff, clen);
+ if (clen < br->br_iov[i].iov_len - voff)
+ voff += clen;
+ else {
+ i++;
+ voff = 0;
+ }
+ boff += clen;
+ } while (boff < len);
+ if (pwrite(bc->bc_fd, buf, len, br->br_offset +
+ off) < 0) {
+ err = errno;
+ break;
+ }
+ off += len;
+ br->br_resid -= len;
+ }
break;
case BOP_FLUSH:
if (bc->bc_ischr) {
@@ -234,9 +301,11 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
err = EROFS;
else if (bc->bc_ischr) {
arg[0] = br->br_offset;
- arg[1] = br->br_iov[0].iov_len;
+ arg[1] = br->br_resid;
if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
err = errno;
+ else
+ br->br_resid = 0;
} else
err = EOPNOTSUPP;
break;
@@ -256,15 +325,20 @@ blockif_thr(void *arg)
struct blockif_ctxt *bc;
struct blockif_elem *be;
pthread_t t;
+ uint8_t *buf;
bc = arg;
+ if (bc->bc_isgeom)
+ buf = malloc(MAXPHYS);
+ else
+ buf = NULL;
t = pthread_self();
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
while (blockif_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
- blockif_proc(bc, be);
+ blockif_proc(bc, be, buf);
pthread_mutex_lock(&bc->bc_mtx);
blockif_complete(bc, be);
}
@@ -275,6 +349,8 @@ blockif_thr(void *arg)
}
pthread_mutex_unlock(&bc->bc_mtx);
+ if (buf)
+ free(buf);
pthread_exit(NULL);
return (NULL);
}
@@ -315,13 +391,14 @@ struct blockif_ctxt *
blockif_open(const char *optstr, const char *ident)
{
char tname[MAXCOMLEN + 1];
+ char name[MAXPATHLEN];
char *nopt, *xopts;
struct blockif_ctxt *bc;
struct stat sbuf;
struct diocgattr_arg arg;
off_t size, psectsz, psectoff;
int extra, fd, i, sectsz;
- int nocache, sync, ro, candelete;
+ int nocache, sync, ro, candelete, geom;
pthread_once(&blockif_once, blockif_init);
@@ -375,7 +452,7 @@ blockif_open(const char *optstr, const char *ident)
size = sbuf.st_size;
sectsz = DEV_BSIZE;
psectsz = psectoff = 0;
- candelete = 0;
+ candelete = geom = 0;
if (S_ISCHR(sbuf.st_mode)) {
if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
@@ -391,6 +468,8 @@ blockif_open(const char *optstr, const char *ident)
arg.len = sizeof(arg.value.i);
if (ioctl(fd, DIOCGATTR, &arg) == 0)
candelete = arg.value.i;
+ if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
+ geom = 1;
} else
psectsz = sbuf.st_blksize;
@@ -403,6 +482,7 @@ blockif_open(const char *optstr, const char *ident)
bc->bc_magic = BLOCKIF_SIG;
bc->bc_fd = fd;
bc->bc_ischr = S_ISCHR(sbuf.st_mode);
+ bc->bc_isgeom = geom;
bc->bc_candelete = candelete;
bc->bc_rdonly = ro;
bc->bc_size = size;
diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h
index 1cb7c80..8e63407 100644
--- a/usr.sbin/bhyve/block_if.h
+++ b/usr.sbin/bhyve/block_if.h
@@ -45,6 +45,7 @@ struct blockif_req {
struct iovec br_iov[BLOCKIF_IOV_MAX];
int br_iovcnt;
off_t br_offset;
+ ssize_t br_resid;
void (*br_callback)(struct blockif_req *req, int err);
void *br_param;
};
diff --git a/usr.sbin/bhyve/ioapic.c b/usr.sbin/bhyve/ioapic.c
index 2950d9a..0ad69d9 100644
--- a/usr.sbin/bhyve/ioapic.c
+++ b/usr.sbin/bhyve/ioapic.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Advanced Computing Technologies LLC
+ * Copyright (c) 2014 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/usr.sbin/bhyve/ioapic.h b/usr.sbin/bhyve/ioapic.h
index 3cfca4f..efdd3c6 100644
--- a/usr.sbin/bhyve/ioapic.h
+++ b/usr.sbin/bhyve/ioapic.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Advanced Computing Technologies LLC
+ * Copyright (c) 2014 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index e8cb2da..2d05789 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -124,7 +124,7 @@ struct ahci_ioreq {
uint32_t len;
uint32_t done;
int slot;
- int prdtl;
+ int more;
};
struct ahci_port {
@@ -269,22 +269,24 @@ ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
case FIS_TYPE_REGD2H:
offset = 0x40;
len = 20;
- irq = AHCI_P_IX_DHR;
+ irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
break;
case FIS_TYPE_SETDEVBITS:
offset = 0x58;
len = 8;
- irq = AHCI_P_IX_SDB;
+ irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
break;
case FIS_TYPE_PIOSETUP:
offset = 0x20;
len = 20;
- irq = 0;
+ irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
break;
default:
WPRINTF("unsupported fis type %d\n", ft);
return;
}
+ if (fis[2] & ATA_S_ERROR)
+ irq |= AHCI_P_IX_TFE;
memcpy(p->rfis + offset, fis, len);
if (irq) {
p->is |= irq;
@@ -309,22 +311,23 @@ ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
uint8_t error;
error = (tfd >> 8) & 0xff;
+ tfd &= 0x77;
memset(fis, 0, sizeof(fis));
fis[0] = FIS_TYPE_SETDEVBITS;
fis[1] = (1 << 6);
- fis[2] = tfd & 0x77;
+ fis[2] = tfd;
fis[3] = error;
if (fis[2] & ATA_S_ERROR) {
- p->is |= AHCI_P_IX_TFE;
p->err_cfis[0] = slot;
- p->err_cfis[2] = tfd & 0x77;
+ p->err_cfis[2] = tfd;
p->err_cfis[3] = error;
memcpy(&p->err_cfis[4], cfis + 4, 16);
} else {
*(uint32_t *)(fis + 4) = (1 << slot);
p->sact &= ~(1 << slot);
}
- p->tfd = tfd;
+ p->tfd &= ~0x77;
+ p->tfd |= tfd;
ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
}
@@ -351,7 +354,6 @@ ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
fis[12] = cfis[12];
fis[13] = cfis[13];
if (fis[2] & ATA_S_ERROR) {
- p->is |= AHCI_P_IX_TFE;
p->err_cfis[0] = 0x80;
p->err_cfis[2] = tfd & 0xff;
p->err_cfis[3] = error;
@@ -363,6 +365,21 @@ ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
}
static void
+ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
+{
+ uint8_t fis[20];
+
+ p->tfd = ATA_S_READY | ATA_S_DSC;
+ memset(fis, 0, sizeof(fis));
+ fis[0] = FIS_TYPE_REGD2H;
+ fis[1] = 0; /* No interrupt */
+ fis[2] = p->tfd; /* Status */
+ fis[3] = 0; /* No error */
+ p->ci &= ~(1 << slot);
+ ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
+}
+
+static void
ahci_write_reset_fis_d2h(struct ahci_port *p)
{
uint8_t fis[20];
@@ -418,7 +435,8 @@ ahci_port_stop(struct ahci_port *p)
slot = aior->slot;
cfis = aior->cfis;
if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
- cfis[2] == ATA_READ_FPDMA_QUEUED)
+ cfis[2] == ATA_READ_FPDMA_QUEUED ||
+ cfis[2] == ATA_SEND_FPDMA_QUEUED)
ncq = 1;
if (ncq)
@@ -489,6 +507,9 @@ ahci_reset(struct pci_ahci_softc *sc)
for (i = 0; i < sc->ports; i++) {
sc->port[i].ie = 0;
sc->port[i].is = 0;
+ sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
+ if (sc->port[i].bctx)
+ sc->port[i].cmd |= AHCI_P_CMD_CPS;
sc->port[i].sctl = 0;
ahci_port_reset(&sc->port[i]);
}
@@ -520,26 +541,79 @@ atapi_string(uint8_t *dest, const char *src, int len)
}
}
+/*
+ * Build up the iovec based on the PRDT, 'done' and 'len'.
+ */
+static void
+ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
+ struct ahci_prdt_entry *prdt, uint16_t prdtl)
+{
+ struct blockif_req *breq = &aior->io_req;
+ int i, j, skip, todo, left, extra;
+ uint32_t dbcsz;
+
+ /* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
+ skip = aior->done;
+ left = aior->len - aior->done;
+ todo = 0;
+ for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
+ i++, prdt++) {
+ dbcsz = (prdt->dbc & DBCMASK) + 1;
+ /* Skip already done part of the PRDT */
+ if (dbcsz <= skip) {
+ skip -= dbcsz;
+ continue;
+ }
+ dbcsz -= skip;
+ if (dbcsz > left)
+ dbcsz = left;
+ breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
+ prdt->dba + skip, dbcsz);
+ breq->br_iov[j].iov_len = dbcsz;
+ todo += dbcsz;
+ left -= dbcsz;
+ skip = 0;
+ j++;
+ }
+
+ /* If we got limited by IOV length, round I/O down to sector size. */
+ if (j == BLOCKIF_IOV_MAX) {
+ extra = todo % blockif_sectsz(p->bctx);
+ todo -= extra;
+ assert(todo > 0);
+ while (extra > 0) {
+ if (breq->br_iov[j - 1].iov_len > extra) {
+ breq->br_iov[j - 1].iov_len -= extra;
+ break;
+ }
+ extra -= breq->br_iov[j - 1].iov_len;
+ j--;
+ }
+ }
+
+ breq->br_iovcnt = j;
+ breq->br_resid = todo;
+ aior->done += todo;
+ aior->more = (aior->done < aior->len && i < prdtl);
+}
+
static void
-ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
- int seek)
+ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
{
struct ahci_ioreq *aior;
struct blockif_req *breq;
- struct pci_ahci_softc *sc;
struct ahci_prdt_entry *prdt;
struct ahci_cmd_hdr *hdr;
uint64_t lba;
uint32_t len;
- int i, err, iovcnt, ncq, readop;
+ int err, first, ncq, readop;
- sc = p->pr_sc;
prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
ncq = 0;
readop = 1;
+ first = (done == 0);
- prdt += seek;
if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
@@ -580,57 +654,33 @@ ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
lba *= blockif_sectsz(p->bctx);
len *= blockif_sectsz(p->bctx);
- /*
- * Pull request off free list
- */
+ /* Pull request off free list */
aior = STAILQ_FIRST(&p->iofhd);
assert(aior != NULL);
STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+
aior->cfis = cfis;
aior->slot = slot;
aior->len = len;
aior->done = done;
breq = &aior->io_req;
breq->br_offset = lba + done;
- iovcnt = hdr->prdtl - seek;
- if (iovcnt > BLOCKIF_IOV_MAX) {
- aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
- iovcnt = BLOCKIF_IOV_MAX;
- } else
- aior->prdtl = 0;
- breq->br_iovcnt = iovcnt;
+ ahci_build_iov(p, aior, prdt, hdr->prdtl);
- /*
- * Mark this command in-flight.
- */
+ /* Mark this command in-flight. */
p->pending |= 1 << slot;
- /*
- * Stuff request onto busy list
- */
+ /* Stuff request onto busy list. */
TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
- /*
- * Build up the iovec based on the prdt
- */
- for (i = 0; i < iovcnt; i++) {
- uint32_t dbcsz;
+ if (ncq && first)
+ ahci_write_fis_d2h_ncq(p, slot);
- dbcsz = (prdt->dbc & DBCMASK) + 1;
- breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
- prdt->dba, dbcsz);
- breq->br_iov[i].iov_len = dbcsz;
- aior->done += dbcsz;
- prdt++;
- }
if (readop)
err = blockif_read(p->bctx, breq);
else
err = blockif_write(p->bctx, breq);
assert(err == 0);
-
- if (ncq)
- p->ci &= ~(1 << slot);
}
static void
@@ -650,7 +700,7 @@ ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
aior->slot = slot;
aior->len = 0;
aior->done = 0;
- aior->prdtl = 0;
+ aior->more = 0;
breq = &aior->io_req;
/*
@@ -703,15 +753,18 @@ ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done
uint8_t *entry;
uint64_t elba;
uint32_t len, elen;
- int err;
+ int err, first, ncq;
uint8_t buf[512];
+ first = (done == 0);
if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
len = (uint16_t)cfis[13] << 8 | cfis[12];
len *= 512;
+ ncq = 0;
} else { /* ATA_SEND_FPDMA_QUEUED */
len = (uint16_t)cfis[11] << 8 | cfis[3];
len *= 512;
+ ncq = 1;
}
read_prdt(p, slot, cfis, buf, sizeof(buf));
@@ -745,12 +798,11 @@ next:
aior->slot = slot;
aior->len = len;
aior->done = done;
- aior->prdtl = 0;
+ aior->more = (len != done);
breq = &aior->io_req;
breq->br_offset = elba * blockif_sectsz(p->bctx);
- breq->br_iovcnt = 1;
- breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
+ breq->br_resid = elen * blockif_sectsz(p->bctx);
/*
* Mark this command in-flight.
@@ -762,6 +814,9 @@ next:
*/
TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+ if (ncq && first)
+ ahci_write_fis_d2h_ncq(p, slot);
+
err = blockif_delete(p->bctx, breq);
assert(err == 0);
}
@@ -903,7 +958,6 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
buf[88] = 0x7f;
if (p->xfermode & ATA_UDMA0)
buf[88] |= (1 << ((p->xfermode & 7) + 8));
- buf[93] = (1 | 1 <<14);
buf[100] = sectors;
buf[101] = (sectors >> 16);
buf[102] = (sectors >> 32);
@@ -1242,8 +1296,7 @@ atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
}
static void
-atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
- uint32_t done, int seek)
+atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
{
struct ahci_ioreq *aior;
struct ahci_cmd_hdr *hdr;
@@ -1253,14 +1306,13 @@ atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
uint8_t *acmd;
uint64_t lba;
uint32_t len;
- int i, err, iovcnt;
+ int err;
sc = p->pr_sc;
acmd = cfis + 0x40;
hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
- prdt += seek;
lba = be32dec(acmd + 2);
if (acmd[0] == READ_10)
len = be16dec(acmd + 7);
@@ -1285,37 +1337,14 @@ atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
aior->done = done;
breq = &aior->io_req;
breq->br_offset = lba + done;
- iovcnt = hdr->prdtl - seek;
- if (iovcnt > BLOCKIF_IOV_MAX) {
- aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
- iovcnt = BLOCKIF_IOV_MAX;
- } else
- aior->prdtl = 0;
- breq->br_iovcnt = iovcnt;
+ ahci_build_iov(p, aior, prdt, hdr->prdtl);
- /*
- * Mark this command in-flight.
- */
+ /* Mark this command in-flight. */
p->pending |= 1 << slot;
- /*
- * Stuff request onto busy list
- */
+ /* Stuff request onto busy list. */
TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
- /*
- * Build up the iovec based on the prdt
- */
- for (i = 0; i < iovcnt; i++) {
- uint32_t dbcsz;
-
- dbcsz = (prdt->dbc & DBCMASK) + 1;
- breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
- prdt->dba, dbcsz);
- breq->br_iov[i].iov_len = dbcsz;
- aior->done += dbcsz;
- prdt++;
- }
err = blockif_read(p->bctx, breq);
assert(err == 0);
}
@@ -1515,7 +1544,7 @@ handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
break;
case READ_10:
case READ_12:
- atapi_read(p, slot, cfis, 0, 0);
+ atapi_read(p, slot, cfis, 0);
break;
case REQUEST_SENSE:
atapi_request_sense(p, slot, cfis);
@@ -1543,6 +1572,7 @@ static void
ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
{
+ p->tfd |= ATA_S_BUSY;
switch (cfis[2]) {
case ATA_ATA_IDENTIFY:
handle_identify(p, slot, cfis);
@@ -1614,7 +1644,7 @@ ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
case ATA_WRITE_DMA48:
case ATA_READ_FPDMA_QUEUED:
case ATA_WRITE_FPDMA_QUEUED:
- ahci_handle_dma(p, slot, cfis, 0, 0);
+ ahci_handle_rw(p, slot, cfis, 0);
break;
case ATA_FLUSHCACHE:
case ATA_FLUSHCACHE48:
@@ -1755,7 +1785,7 @@ ata_ioreq_cb(struct blockif_req *br, int err)
struct pci_ahci_softc *sc;
uint32_t tfd;
uint8_t *cfis;
- int pending, slot, ncq, dsm;
+ int slot, ncq, dsm;
DPRINTF("%s %d\n", __func__, err);
@@ -1764,7 +1794,6 @@ ata_ioreq_cb(struct blockif_req *br, int err)
p = aior->io_pr;
cfis = aior->cfis;
slot = aior->slot;
- pending = aior->prdtl;
sc = p->pr_sc;
hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
@@ -1792,25 +1821,18 @@ ata_ioreq_cb(struct blockif_req *br, int err)
if (!err)
hdr->prdbc = aior->done;
- if (dsm) {
- if (aior->done != aior->len && !err) {
+ if (!err && aior->more) {
+ if (dsm)
ahci_handle_dsm_trim(p, slot, cfis, aior->done);
- goto out;
- }
- } else {
- if (pending && !err) {
- ahci_handle_dma(p, slot, cfis, aior->done,
- hdr->prdtl - pending);
- goto out;
- }
+ else
+ ahci_handle_rw(p, slot, cfis, aior->done);
+ goto out;
}
- if (!err && aior->done == aior->len) {
+ if (!err)
tfd = ATA_S_READY | ATA_S_DSC;
- } else {
+ else
tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
- }
-
if (ncq)
ahci_write_fis_sdb(p, slot, cfis, tfd);
else
@@ -1836,7 +1858,7 @@ atapi_ioreq_cb(struct blockif_req *br, int err)
struct pci_ahci_softc *sc;
uint8_t *cfis;
uint32_t tfd;
- int pending, slot;
+ int slot;
DPRINTF("%s %d\n", __func__, err);
@@ -1844,7 +1866,6 @@ atapi_ioreq_cb(struct blockif_req *br, int err)
p = aior->io_pr;
cfis = aior->cfis;
slot = aior->slot;
- pending = aior->prdtl;
sc = p->pr_sc;
hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
@@ -1863,19 +1884,18 @@ atapi_ioreq_cb(struct blockif_req *br, int err)
if (!err)
hdr->prdbc = aior->done;
- if (pending && !err) {
- atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
+ if (!err && aior->more) {
+ atapi_read(p, slot, cfis, aior->done);
goto out;
}
- if (!err && aior->done == aior->len) {
+ if (!err) {
tfd = ATA_S_READY | ATA_S_DSC;
} else {
p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
p->asc = 0x21;
tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
}
-
cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
ahci_write_fis_d2h(p, slot, cfis, tfd);
@@ -1949,8 +1969,15 @@ pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
break;
case AHCI_P_CMD:
{
- p->cmd = value;
-
+ p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
+ AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
+ AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
+ AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
+ p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
+ AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
+ AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
+ AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
+
if (!(value & AHCI_P_CMD_ST)) {
ahci_port_stop(p);
} else {
@@ -1978,6 +2005,10 @@ pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
p->cmd &= ~AHCI_P_CMD_CLO;
}
+ if (value & AHCI_P_CMD_ICC_MASK) {
+ p->cmd &= ~AHCI_P_CMD_ICC_MASK;
+ }
+
ahci_handle_port(p);
break;
}
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
index 6b906ed..03ff0c0 100644
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -59,17 +59,6 @@ __FBSDID("$FreeBSD$");
#define CONF1_ENABLE 0x80000000ul
-#define CFGWRITE(pi,off,val,b) \
-do { \
- if ((b) == 1) { \
- pci_set_cfgdata8((pi),(off),(val)); \
- } else if ((b) == 2) { \
- pci_set_cfgdata16((pi),(off),(val)); \
- } else { \
- pci_set_cfgdata32((pi),(off),(val)); \
- } \
-} while (0)
-
#define MAXBUSES (PCI_BUSMAX + 1)
#define MAXSLOTS (PCI_SLOTMAX + 1)
#define MAXFUNCS (PCI_FUNCMAX + 1)
@@ -124,6 +113,30 @@ static void pci_lintr_update(struct pci_devinst *pi);
static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot,
int func, int coff, int bytes, uint32_t *val);
+static __inline void
+CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes)
+{
+
+ if (bytes == 1)
+ pci_set_cfgdata8(pi, coff, val);
+ else if (bytes == 2)
+ pci_set_cfgdata16(pi, coff, val);
+ else
+ pci_set_cfgdata32(pi, coff, val);
+}
+
+static __inline uint32_t
+CFGREAD(struct pci_devinst *pi, int coff, int bytes)
+{
+
+ if (bytes == 1)
+ return (pci_get_cfgdata8(pi, coff));
+ else if (bytes == 2)
+ return (pci_get_cfgdata16(pi, coff));
+ else
+ return (pci_get_cfgdata32(pi, coff));
+}
+
/*
* I/O access
*/
@@ -1653,27 +1666,31 @@ pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv)
}
}
-static uint32_t
-bits_changed(uint32_t old, uint32_t new, uint32_t mask)
-{
-
- return ((old ^ new) & mask);
-}
-
static void
-pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
+pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes)
{
- int i;
- uint16_t old;
+ int i, rshift;
+ uint32_t cmd, cmd2, changed, old, readonly;
+
+ cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */
/*
- * The command register is at an offset of 4 bytes and thus the
- * guest could write 1, 2 or 4 bytes starting at this offset.
+ * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3.
+ *
+ * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are
+ * 'write 1 to clear'. However these bits are not set to '1' by
+ * any device emulation so it is simpler to treat them as readonly.
*/
+ rshift = (coff & 0x3) * 8;
+ readonly = 0xFFFFF880 >> rshift;
- old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */
- CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */
- new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */
+ old = CFGREAD(pi, coff, bytes);
+ new &= ~readonly;
+ new |= (old & readonly);
+ CFGWRITE(pi, coff, new, bytes); /* update config */
+
+ cmd2 = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */
+ changed = cmd ^ cmd2;
/*
* If the MMIO or I/O address space decoding has changed then
@@ -1686,7 +1703,7 @@ pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
break;
case PCIBAR_IO:
/* I/O address space decoding changed? */
- if (bits_changed(old, new, PCIM_CMD_PORTEN)) {
+ if (changed & PCIM_CMD_PORTEN) {
if (porten(pi))
register_bar(pi, i);
else
@@ -1696,7 +1713,7 @@ pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
case PCIBAR_MEM32:
case PCIBAR_MEM64:
/* MMIO address space decoding changed? */
- if (bits_changed(old, new, PCIM_CMD_MEMEN)) {
+ if (changed & PCIM_CMD_MEMEN) {
if (memen(pi))
register_bar(pi, i);
else
@@ -1776,14 +1793,8 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func,
needcfg = 1;
}
- if (needcfg) {
- if (bytes == 1)
- *eax = pci_get_cfgdata8(pi, coff);
- else if (bytes == 2)
- *eax = pci_get_cfgdata16(pi, coff);
- else
- *eax = pci_get_cfgdata32(pi, coff);
- }
+ if (needcfg)
+ *eax = CFGREAD(pi, coff, bytes);
pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax);
} else {
@@ -1853,8 +1864,8 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func,
} else if (pci_emul_iscap(pi, coff)) {
pci_emul_capwrite(pi, coff, bytes, *eax);
- } else if (coff == PCIR_COMMAND) {
- pci_emul_cmdwrite(pi, *eax, bytes);
+ } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) {
+ pci_emul_cmdsts_write(pi, coff, *eax, bytes);
} else {
CFGWRITE(pi, coff, *eax, bytes);
}
@@ -1927,7 +1938,7 @@ INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
#define DMEMSZ 4096
struct pci_emul_dsoftc {
uint8_t ioregs[DIOSZ];
- uint8_t memregs[DMEMSZ];
+ uint8_t memregs[2][DMEMSZ];
};
#define PCI_EMUL_MSI_MSGS 4
@@ -1956,6 +1967,9 @@ pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
assert(error == 0);
+ error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ);
+ assert(error == 0);
+
return (0);
}
@@ -1995,21 +2009,23 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
}
}
- if (baridx == 1) {
+ if (baridx == 1 || baridx == 2) {
if (offset + size > DMEMSZ) {
printf("diow: memw too large, offset %ld size %d\n",
offset, size);
return;
}
+ i = baridx - 1; /* 'memregs' index */
+
if (size == 1) {
- sc->memregs[offset] = value;
+ sc->memregs[i][offset] = value;
} else if (size == 2) {
- *(uint16_t *)&sc->memregs[offset] = value;
+ *(uint16_t *)&sc->memregs[i][offset] = value;
} else if (size == 4) {
- *(uint32_t *)&sc->memregs[offset] = value;
+ *(uint32_t *)&sc->memregs[i][offset] = value;
} else if (size == 8) {
- *(uint64_t *)&sc->memregs[offset] = value;
+ *(uint64_t *)&sc->memregs[i][offset] = value;
} else {
printf("diow: memw unknown size %d\n", size);
}
@@ -2019,7 +2035,7 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
*/
}
- if (baridx > 1) {
+ if (baridx > 2) {
printf("diow: unknown bar idx %d\n", baridx);
}
}
@@ -2030,6 +2046,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
{
struct pci_emul_dsoftc *sc = pi->pi_arg;
uint32_t value;
+ int i;
if (baridx == 0) {
if (offset + size > DIOSZ) {
@@ -2048,29 +2065,31 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
printf("dior: ior unknown size %d\n", size);
}
}
-
- if (baridx == 1) {
+
+ if (baridx == 1 || baridx == 2) {
if (offset + size > DMEMSZ) {
printf("dior: memr too large, offset %ld size %d\n",
offset, size);
return (0);
}
-
+
+ i = baridx - 1; /* 'memregs' index */
+
if (size == 1) {
- value = sc->memregs[offset];
+ value = sc->memregs[i][offset];
} else if (size == 2) {
- value = *(uint16_t *) &sc->memregs[offset];
+ value = *(uint16_t *) &sc->memregs[i][offset];
} else if (size == 4) {
- value = *(uint32_t *) &sc->memregs[offset];
+ value = *(uint32_t *) &sc->memregs[i][offset];
} else if (size == 8) {
- value = *(uint64_t *) &sc->memregs[offset];
+ value = *(uint64_t *) &sc->memregs[i][offset];
} else {
printf("dior: ior unknown size %d\n", size);
}
}
- if (baridx > 1) {
+ if (baridx > 2) {
printf("dior: unknown bar idx %d\n", baridx);
return (0);
}
diff --git a/usr.sbin/bhyve/pci_irq.c b/usr.sbin/bhyve/pci_irq.c
index 20e033f..f22b15c 100644
--- a/usr.sbin/bhyve/pci_irq.c
+++ b/usr.sbin/bhyve/pci_irq.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Advanced Computing Technologies LLC
+ * Copyright (c) 2014 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/usr.sbin/bhyve/pci_irq.h b/usr.sbin/bhyve/pci_irq.h
index 9d331a5..24f9c99 100644
--- a/usr.sbin/bhyve/pci_irq.h
+++ b/usr.sbin/bhyve/pci_irq.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Advanced Computing Technologies LLC
+ * Copyright (c) 2014 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c
index 967b528..35daa1f 100644
--- a/usr.sbin/bhyve/pci_virtio_block.c
+++ b/usr.sbin/bhyve/pci_virtio_block.c
@@ -198,7 +198,7 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
struct pci_vtblk_ioreq *io;
int i, n;
int err;
- int iolen;
+ ssize_t iolen;
int writeop, type;
off_t offset;
struct iovec iov[BLOCKIF_IOV_MAX + 2];
@@ -246,8 +246,9 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
iolen += iov[i].iov_len;
}
+ io->io_req.br_resid = iolen;
- DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
+ DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r",
writeop ? "write" : "read/ident", iolen, i - 1, offset));
switch (type) {
diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
index 457a405..7227488 100644
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@@ -393,6 +393,7 @@ pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
*/
if (sc->vsc_rx_ready == 0) {
sc->vsc_rx_ready = 1;
+ vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
}
}
@@ -438,6 +439,7 @@ pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
/* Signal the tx thread for processing */
pthread_mutex_lock(&sc->tx_mtx);
+ vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
if (sc->tx_in_progress == 0)
pthread_cond_signal(&sc->tx_cond);
pthread_mutex_unlock(&sc->tx_mtx);
@@ -466,6 +468,7 @@ pci_vtnet_tx_thread(void *param)
for (;;) {
/* note - tx mutex is locked here */
do {
+ vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
if (sc->resetting)
have_work = 0;
else
@@ -478,6 +481,7 @@ pci_vtnet_tx_thread(void *param)
assert(error == 0);
}
} while (!have_work);
+ vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
sc->tx_in_progress = 1;
pthread_mutex_unlock(&sc->tx_mtx);
@@ -640,8 +644,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
- /* link always up */
- sc->vsc_config.status = 1;
+ /* Link is up if we managed to open tap device. */
+ sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0);
/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
diff --git a/usr.sbin/bhyve/pm.c b/usr.sbin/bhyve/pm.c
index f5a2d43..f7c1c23 100644
--- a/usr.sbin/bhyve/pm.c
+++ b/usr.sbin/bhyve/pm.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013 Advanced Computing Technologies LLC
+ * Copyright (c) 2013 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
OpenPOWER on IntegriCloud