summaryrefslogtreecommitdiffstats
path: root/usr.sbin/bhyve/pci_virtio_block.c
diff options
context:
space:
mode:
authorgrehan <grehan@FreeBSD.org>2013-07-17 23:37:33 +0000
committergrehan <grehan@FreeBSD.org>2013-07-17 23:37:33 +0000
commita6cf66c6cfea279d740ce36eac27ac9ec27ae0cb (patch)
tree064903149f0c797df3873dc7997273f417c81f93 /usr.sbin/bhyve/pci_virtio_block.c
parentb8663d4c053e282b686f3e2a2d625b21b5944176 (diff)
downloadFreeBSD-src-a6cf66c6cfea279d740ce36eac27ac9ec27ae0cb.zip
FreeBSD-src-a6cf66c6cfea279d740ce36eac27ac9ec27ae0cb.tar.gz
Major rework of the virtio code. Split out common parts, and modify
the net/block devices accordingly. Submitted by: Chris Torek torek at torek dot net Reviewed by: grehan
Diffstat (limited to 'usr.sbin/bhyve/pci_virtio_block.c')
-rw-r--r--usr.sbin/bhyve/pci_virtio_block.c475
1 files changed, 89 insertions, 386 deletions
diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c
index 5c42dc2..4395410 100644
--- a/usr.sbin/bhyve/pci_virtio_block.c
+++ b/usr.sbin/bhyve/pci_virtio_block.c
@@ -53,14 +53,6 @@ __FBSDID("$FreeBSD$");
#define VTBLK_RINGSZ 64
-#define VTBLK_CFGSZ 28
-
-#define VTBLK_R_CFG VTCFG_R_CFG1
-#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1
-#define VTBLK_R_MAX VTBLK_R_CFG_END
-
-#define VTBLK_REGSZ VTBLK_R_MAX+1
-
#define VTBLK_MAXSEGS 32
#define VTBLK_S_OK 0
@@ -71,28 +63,10 @@ __FBSDID("$FreeBSD$");
*/
#define VTBLK_S_HOSTCAPS \
( 0x00000004 | /* host maximum request segments */ \
- 0x10000000 ) /* supports indirect descriptors */
-
-static int use_msix = 1;
-
-struct vring_hqueue {
- /* Internal state */
- uint16_t hq_size;
- uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */
-
- /* Host-context pointers to the queue */
- struct virtio_desc *hq_dtable;
- uint16_t *hq_avail_flags;
- uint16_t *hq_avail_idx; /* monotonically increasing */
- uint16_t *hq_avail_ring;
-
- uint16_t *hq_used_flags;
- uint16_t *hq_used_idx; /* monotonically increasing */
- struct virtio_used *hq_used_ring;
-};
+ VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */
/*
- * Config space
+ * Config space "registers"
*/
struct vtblk_config {
uint64_t vbc_capacity;
@@ -104,7 +78,6 @@ struct vtblk_config {
uint32_t vbc_blk_size;
uint32_t vbc_sectors_max;
} __packed;
-CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ);
/*
* Fixed-size block header
@@ -129,113 +102,69 @@ static int pci_vtblk_debug;
* Per-device softc
*/
struct pci_vtblk_softc {
- struct pci_devinst *vbsc_pi;
+ struct virtio_softc vbsc_vs;
+ struct vqueue_info vbsc_vq;
int vbsc_fd;
- int vbsc_status;
- int vbsc_isr;
- int vbsc_lastq;
- uint32_t vbsc_features;
- uint64_t vbsc_pfn;
- struct vring_hqueue vbsc_q;
struct vtblk_config vbsc_cfg;
- uint16_t msix_table_idx_req;
- uint16_t msix_table_idx_cfg;
};
-#define vtblk_ctx(sc) ((sc)->vbsc_pi->pi_vmctx)
-
-/*
- * Return the size of IO BAR that maps virtio header and device specific
- * region. The size would vary depending on whether MSI-X is enabled or
- * not
- */
-static uint64_t
-pci_vtblk_iosize(struct pci_devinst *pi)
-{
-
- if (pci_msix_enabled(pi))
- return (VTBLK_REGSZ);
- else
- return (VTBLK_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX));
-}
-
-/*
- * Return the number of available descriptors in the vring taking care
- * of the 16-bit index wraparound.
- */
-static int
-hq_num_avail(struct vring_hqueue *hq)
-{
- uint16_t ndesc;
-
- /*
- * We're just computing (a-b) in GF(216).
- *
- * The only glitch here is that in standard C,
- * uint16_t promotes to (signed) int when int has
- * more than 16 bits (pretty much always now), so
- * we have to force it back to unsigned.
- */
- ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx;
-
- assert(ndesc <= hq->hq_size);
- return (ndesc);
-}
+static void pci_vtblk_reset(void *);
+static void pci_vtblk_notify(void *, struct vqueue_info *);
+static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
+static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtblk_vi_consts = {
+ "vtblk", /* our name */
+ 1, /* we support 1 virtqueue */
+ sizeof(struct vtblk_config), /* config reg size */
+ pci_vtblk_reset, /* reset */
+ pci_vtblk_notify, /* device-wide qnotify */
+ pci_vtblk_cfgread, /* read PCI config */
+ pci_vtblk_cfgwrite, /* write PCI config */
+ VTBLK_S_HOSTCAPS, /* our capabilities */
+};
static void
-pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value)
+pci_vtblk_reset(void *vsc)
{
- if (value == 0) {
- DPRINTF(("vtblk: device reset requested !\n"));
- sc->vbsc_isr = 0;
- sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;
- sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;
- sc->vbsc_features = 0;
- sc->vbsc_pfn = 0;
- sc->vbsc_lastq = 0;
- memset(&sc->vbsc_q, 0, sizeof(struct vring_hqueue));
- }
+ struct pci_vtblk_softc *sc = vsc;
- sc->vbsc_status = value;
+ DPRINTF(("vtblk: device reset requested !\n"));
+ vi_reset_dev(&sc->vbsc_vs);
}
static void
-pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
+pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
{
- struct iovec iov[VTBLK_MAXSEGS];
struct virtio_blk_hdr *vbh;
- struct virtio_desc *vd, *vid;
- struct virtio_used *vu;
uint8_t *status;
- int i;
+ int i, n;
int err;
int iolen;
- int uidx, aidx, didx;
- int indirect, writeop, type;
+ int writeop, type;
off_t offset;
+ struct iovec iov[VTBLK_MAXSEGS + 2];
+ uint16_t flags[VTBLK_MAXSEGS + 2];
- uidx = *hq->hq_used_idx;
- aidx = hq->hq_cur_aidx;
- didx = hq->hq_avail_ring[aidx % hq->hq_size];
- assert(didx >= 0 && didx < hq->hq_size);
-
- vd = &hq->hq_dtable[didx];
-
- indirect = ((vd->vd_flags & VRING_DESC_F_INDIRECT) != 0);
-
- if (indirect) {
- vid = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, vd->vd_len);
- vd = &vid[0];
- }
+ n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags);
/*
- * The first descriptor will be the read-only fixed header
+ * The first descriptor will be the read-only fixed header,
+ * and the last is for status (hence +2 above and below).
+ * The remaining iov's are the actual data I/O vectors.
+ *
+ * XXX - note - this fails on crash dump, which does a
+ * VIRTIO_BLK_T_FLUSH with a zero transfer length
*/
- vbh = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr,
- sizeof(struct virtio_blk_hdr));
- assert(vd->vd_len == sizeof(struct virtio_blk_hdr));
- assert(vd->vd_flags & VRING_DESC_F_NEXT);
- assert((vd->vd_flags & VRING_DESC_F_WRITE) == 0);
+ assert (n >= 3 && n < VTBLK_MAXSEGS + 2);
+
+ assert((flags[0] & VRING_DESC_F_WRITE) == 0);
+ assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
+ vbh = iov[0].iov_base;
+
+ status = iov[--n].iov_base;
+ assert(iov[n].iov_len == 1);
+ assert(flags[n] & VRING_DESC_F_WRITE);
/*
* XXX
@@ -247,120 +176,44 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq)
offset = vbh->vbh_sector * DEV_BSIZE;
- /*
- * Build up the iovec based on the guest's data descriptors
- */
- i = iolen = 0;
- while (1) {
- if (indirect)
- vd = &vid[i + 1]; /* skip first indirect desc */
- else
- vd = &hq->hq_dtable[vd->vd_next];
-
- if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0)
- break;
-
- if (i == VTBLK_MAXSEGS)
- break;
-
+ iolen = 0;
+ for (i = 1; i < n; i++) {
/*
* - write op implies read-only descriptor,
* - read op implies write-only descriptor,
* therefore test the inverse of the descriptor bit
* to the op.
*/
- assert(((vd->vd_flags & VRING_DESC_F_WRITE) == 0) ==
- writeop);
-
- iov[i].iov_base = paddr_guest2host(vtblk_ctx(sc),
- vd->vd_addr,
- vd->vd_len);
- iov[i].iov_len = vd->vd_len;
- iolen += vd->vd_len;
- i++;
+ assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
+ iolen += iov[i].iov_len;
}
- /* Lastly, get the address of the status byte */
- status = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, 1);
- assert(vd->vd_len == 1);
- assert((vd->vd_flags & VRING_DESC_F_NEXT) == 0);
- assert(vd->vd_flags & VRING_DESC_F_WRITE);
-
DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
- writeop ? "write" : "read", iolen, i, offset));
+ writeop ? "write" : "read", iolen, i - 1, offset));
if (writeop)
- err = pwritev(sc->vbsc_fd, iov, i, offset);
+ err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset);
else
- err = preadv(sc->vbsc_fd, iov, i, offset);
+ err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset);
*status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK;
/*
- * Return the single descriptor back to the host
+ * Return the descriptor back to the host.
+ * We wrote 1 byte (our status) to host.
*/
- vu = &hq->hq_used_ring[uidx % hq->hq_size];
- vu->vu_idx = didx;
- vu->vu_tlen = 1;
- hq->hq_cur_aidx++;
- *hq->hq_used_idx += 1;
-
- /*
- * Generate an interrupt if able
- */
- if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
- if (use_msix) {
- pci_generate_msix(sc->vbsc_pi, sc->msix_table_idx_req);
- } else if (sc->vbsc_isr == 0) {
- sc->vbsc_isr = 1;
- pci_generate_msi(sc->vbsc_pi, 0);
- }
- }
+ vq_relchain(vq, 1);
}
static void
-pci_vtblk_qnotify(struct pci_vtblk_softc *sc)
+pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
{
- struct vring_hqueue *hq = &sc->vbsc_q;
- int ndescs;
+ struct pci_vtblk_softc *sc = vsc;
- while ((ndescs = hq_num_avail(hq)) != 0) {
- /*
- * Run through all the entries, placing them into iovecs and
- * sending when an end-of-packet is found
- */
- pci_vtblk_proc(sc, hq);
- }
-}
-
-static void
-pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn)
-{
- struct vring_hqueue *hq;
-
- sc->vbsc_pfn = pfn << VRING_PFN;
-
- /*
- * Set up host pointers to the various parts of the
- * queue
- */
- hq = &sc->vbsc_q;
- hq->hq_size = VTBLK_RINGSZ;
-
- hq->hq_dtable = paddr_guest2host(vtblk_ctx(sc), pfn << VRING_PFN,
- vring_size(VTBLK_RINGSZ));
- hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size);
- hq->hq_avail_idx = hq->hq_avail_flags + 1;
- hq->hq_avail_ring = hq->hq_avail_flags + 2;
- hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
- VRING_ALIGN);
- hq->hq_used_idx = hq->hq_used_flags + 1;
- hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
-
- /*
- * Initialize queue indexes
- */
- hq->hq_cur_aidx = 0;
+ vq_startchains(vq);
+ while (vq_has_descs(vq))
+ pci_vtblk_proc(sc, vq);
+ vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
}
static int
@@ -371,6 +224,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
off_t size;
int fd;
int sectsz;
+ int use_msix;
const char *env_msi;
if (opts == NULL) {
@@ -412,10 +266,14 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
sc = malloc(sizeof(struct pci_vtblk_softc));
memset(sc, 0, sizeof(struct pci_vtblk_softc));
- pi->pi_arg = sc;
- sc->vbsc_pi = pi;
+ /* record fd of storage device/file */
sc->vbsc_fd = fd;
+ /* init virtio softc and virtqueues */
+ vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
+ sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
+ /* sc->vbsc_vq.vq_notify = we have no per-queue notify */
+
/* setup virtio block config space */
sc->vbsc_cfg.vbc_capacity = size / sectsz;
sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS;
@@ -426,206 +284,51 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
sc->vbsc_cfg.vbc_geom_s = 0;
sc->vbsc_cfg.vbc_sectors_max = 0;
- /* initialize config space */
+ /*
+ * Should we move some of this into virtio.c? Could
+ * have the device, class, and subdev_0 as fields in
+ * the virtio constants structure.
+ */
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
+ use_msix = 1;
if ((env_msi = getenv("BHYVE_USE_MSI"))) {
if (strcasecmp(env_msi, "yes") == 0)
use_msix = 0;
}
-
- if (use_msix) {
- /* MSI-X Support */
- sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR;
- sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR;
-
- if (pci_emul_add_msixcap(pi, 2, 1))
- return (1);
- } else {
- /* MSI Support */
- pci_emul_add_msicap(pi, 1);
- }
-
- pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ);
-
+ if (vi_intr_init(&sc->vbsc_vs, 1, use_msix))
+ return (1);
+ vi_set_io_bar(&sc->vbsc_vs, 0);
return (0);
}
-static uint64_t
-vtblk_adjust_offset(struct pci_devinst *pi, uint64_t offset)
-{
- /*
- * Device specific offsets used by guest would change
- * based on whether MSI-X capability is enabled or not
- */
- if (!pci_msix_enabled(pi)) {
- if (offset >= VTCFG_R_MSIX)
- return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX));
- }
-
- return (offset);
-}
-
-static void
-pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int baridx, uint64_t offset, int size, uint64_t value)
+static int
+pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
{
- struct pci_vtblk_softc *sc = pi->pi_arg;
- if (use_msix) {
- if (baridx == pci_msix_table_bar(pi) ||
- baridx == pci_msix_pba_bar(pi)) {
- pci_emul_msix_twrite(pi, offset, size, value);
- return;
- }
- }
-
- assert(baridx == 0);
-
- if (offset + size > pci_vtblk_iosize(pi)) {
- DPRINTF(("vtblk_write: 2big, offset %ld size %d\n",
- offset, size));
- return;
- }
-
- offset = vtblk_adjust_offset(pi, offset);
-
- switch (offset) {
- case VTCFG_R_GUESTCAP:
- assert(size == 4);
- sc->vbsc_features = value & VTBLK_S_HOSTCAPS;
- break;
- case VTCFG_R_PFN:
- assert(size == 4);
- pci_vtblk_ring_init(sc, value);
- break;
- case VTCFG_R_QSEL:
- assert(size == 2);
- sc->vbsc_lastq = value;
- break;
- case VTCFG_R_QNOTIFY:
- assert(size == 2);
- assert(value == 0);
- pci_vtblk_qnotify(sc);
- break;
- case VTCFG_R_STATUS:
- assert(size == 1);
- pci_vtblk_update_status(sc, value);
- break;
- case VTCFG_R_CFGVEC:
- assert(size == 2);
- sc->msix_table_idx_cfg = value;
- break;
- case VTCFG_R_QVEC:
- assert(size == 2);
- sc->msix_table_idx_req = value;
- break;
- case VTCFG_R_HOSTCAP:
- case VTCFG_R_QNUM:
- case VTCFG_R_ISR:
- case VTBLK_R_CFG ... VTBLK_R_CFG_END:
- DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset));
- break;
- default:
- DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset));
- value = 0;
- break;
- }
+ DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
+ return (1);
}
-uint64_t
-pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- int baridx, uint64_t offset, int size)
+static int
+pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
{
- struct pci_vtblk_softc *sc = pi->pi_arg;
+ struct pci_vtblk_softc *sc = vsc;
void *ptr;
- uint32_t value;
- if (use_msix) {
- if (baridx == pci_msix_table_bar(pi) ||
- baridx == pci_msix_pba_bar(pi)) {
- return (pci_emul_msix_tread(pi, offset, size));
- }
- }
-
- assert(baridx == 0);
-
- if (offset + size > pci_vtblk_iosize(pi)) {
- DPRINTF(("vtblk_read: 2big, offset %ld size %d\n",
- offset, size));
- return (0);
- }
-
- offset = vtblk_adjust_offset(pi, offset);
-
- switch (offset) {
- case VTCFG_R_HOSTCAP:
- assert(size == 4);
- value = VTBLK_S_HOSTCAPS;
- break;
- case VTCFG_R_GUESTCAP:
- assert(size == 4);
- value = sc->vbsc_features; /* XXX never read ? */
- break;
- case VTCFG_R_PFN:
- assert(size == 4);
- value = sc->vbsc_pfn >> VRING_PFN;
- break;
- case VTCFG_R_QNUM:
- value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0;
- break;
- case VTCFG_R_QSEL:
- assert(size == 2);
- value = sc->vbsc_lastq; /* XXX never read ? */
- break;
- case VTCFG_R_QNOTIFY:
- assert(size == 2);
- value = 0; /* XXX never read ? */
- break;
- case VTCFG_R_STATUS:
- assert(size == 1);
- value = sc->vbsc_status;
- break;
- case VTCFG_R_ISR:
- assert(size == 1);
- value = sc->vbsc_isr;
- sc->vbsc_isr = 0; /* a read clears this flag */
- break;
- case VTCFG_R_CFGVEC:
- assert(size == 2);
- value = sc->msix_table_idx_cfg;
- break;
- case VTCFG_R_QVEC:
- assert(size == 2);
- value = sc->msix_table_idx_req;
- break;
- case VTBLK_R_CFG ... VTBLK_R_CFG_END:
- assert(size + offset <= (VTBLK_R_CFG_END + 1));
- ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG;
- if (size == 1) {
- value = *(uint8_t *) ptr;
- } else if (size == 2) {
- value = *(uint16_t *) ptr;
- } else {
- value = *(uint32_t *) ptr;
- }
- break;
- default:
- DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset));
- value = 0;
- break;
- }
-
- return (value);
+ /* our caller has already verified offset and size */
+ ptr = (uint8_t *)&sc->vbsc_cfg + offset;
+ memcpy(retval, ptr, size);
+ return (0);
}
struct pci_devemu pci_de_vblk = {
.pe_emu = "virtio-blk",
.pe_init = pci_vtblk_init,
- .pe_barwrite = pci_vtblk_write,
- .pe_barread = pci_vtblk_read
+ .pe_barwrite = vi_pci_write,
+ .pe_barread = vi_pci_read
};
PCI_EMUL_SET(pci_de_vblk);
OpenPOWER on IntegriCloud