summaryrefslogtreecommitdiffstats
path: root/usr.sbin
diff options
context:
space:
mode:
authorgnn <gnn@FreeBSD.org>2016-01-09 03:08:21 +0000
committergnn <gnn@FreeBSD.org>2016-01-09 03:08:21 +0000
commitcac3f209134f9f95a431a8480d1275c640d86d7d (patch)
treec9f4239d26896f6650a414c0e198e1cab7088454 /usr.sbin
parente38d62e90de5adf82f18a626067916711d8f233d (diff)
downloadFreeBSD-src-cac3f209134f9f95a431a8480d1275c640d86d7d.zip
FreeBSD-src-cac3f209134f9f95a431a8480d1275c640d86d7d.tar.gz
Add netmap support for bhyve
Submitted by: btw MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D4826
Diffstat (limited to 'usr.sbin')
-rw-r--r--usr.sbin/bhyve/pci_virtio_net.c305
1 files changed, 273 insertions, 32 deletions
diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
index aa9b581..a8c5621 100644
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@@ -36,6 +36,10 @@ __FBSDID("$FreeBSD$");
#include <sys/ioctl.h>
#include <machine/atomic.h>
#include <net/ethernet.h>
+#ifndef NETMAP_WITH_LIBS
+#define NETMAP_WITH_LIBS
+#endif
+#include <net/netmap_user.h>
#include <errno.h>
#include <fcntl.h>
@@ -133,6 +137,8 @@ struct pci_vtnet_softc {
struct mevent *vsc_mevp;
int vsc_tapfd;
+ struct nm_desc *vsc_nmd;
+
int vsc_rx_ready;
volatile int resetting; /* set and checked outside lock */
@@ -149,6 +155,10 @@ struct pci_vtnet_softc {
pthread_mutex_t tx_mtx;
pthread_cond_t tx_cond;
int tx_in_progress;
+
+ void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc);
+ void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov,
+ int iovcnt, int len);
};
static void pci_vtnet_reset(void *);
@@ -371,14 +381,208 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
vq_endchains(vq, 1);
}
+static int
+pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+{
+ int r, i;
+ int len = 0;
+
+ for (r = nmd->cur_tx_ring; ; ) {
+ struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
+ uint32_t cur, idx;
+ char *buf;
+
+ if (nm_ring_empty(ring)) {
+ r++;
+ if (r > nmd->last_tx_ring)
+ r = nmd->first_tx_ring;
+ if (r == nmd->cur_rx_ring)
+ break;
+ continue;
+ }
+ cur = ring->cur;
+ idx = ring->slot[cur].buf_idx;
+ buf = NETMAP_BUF(ring, idx);
+
+ for (i = 0; i < iovcnt; i++) {
+ memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
+ len += iov[i].iov_len;
+ }
+ ring->slot[cur].len = len;
+ ring->head = ring->cur = nm_ring_next(ring, cur);
+ nmd->cur_tx_ring = r;
+ ioctl(nmd->fd, NIOCTXSYNC, NULL);
+ break;
+ }
+
+ return (len);
+}
+
+static inline int
+pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+{
+ int len = 0;
+ int i = 0;
+ int r;
+
+ for (r = nmd->cur_rx_ring; ; ) {
+ struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
+ uint32_t cur, idx;
+ char *buf;
+ size_t left;
+
+ if (nm_ring_empty(ring)) {
+ r++;
+ if (r > nmd->last_rx_ring)
+ r = nmd->first_rx_ring;
+ if (r == nmd->cur_rx_ring)
+ break;
+ continue;
+ }
+ cur = ring->cur;
+ idx = ring->slot[cur].buf_idx;
+ buf = NETMAP_BUF(ring, idx);
+ left = ring->slot[cur].len;
+
+ for (i = 0; i < iovcnt && left > 0; i++) {
+ if (iov[i].iov_len > left)
+ iov[i].iov_len = left;
+ memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
+ len += iov[i].iov_len;
+ left -= iov[i].iov_len;
+ }
+ ring->head = ring->cur = nm_ring_next(ring, cur);
+ nmd->cur_rx_ring = r;
+ ioctl(nmd->fd, NIOCRXSYNC, NULL);
+ break;
+ }
+ for (; i < iovcnt; i++)
+ iov[i].iov_len = 0;
+
+ return (len);
+}
+
+/*
+ * Called to send a buffer chain out to the vale port
+ */
+static void
+pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
+ int len)
+{
+ static char pad[60]; /* all zero bytes */
+
+ if (sc->vsc_nmd == NULL)
+ return;
+
+ /*
+ * If the length is < 60, pad out to that and add the
+ * extra zero'd segment to the iov. It is guaranteed that
+ * there is always an extra iov available by the caller.
+ */
+ if (len < 60) {
+ iov[iovcnt].iov_base = pad;
+ iov[iovcnt].iov_len = 60 - len;
+ iovcnt++;
+ }
+ (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
+}
+
static void
-pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
+pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
+{
+ struct iovec iov[VTNET_MAXSEGS], *riov;
+ struct vqueue_info *vq;
+ void *vrx;
+ int len, n;
+ uint16_t idx;
+
+ /*
+ * Should never be called without a valid netmap descriptor
+ */
+ assert(sc->vsc_nmd != NULL);
+
+ /*
+ * But, will be called when the rx ring hasn't yet
+ * been set up or the guest is resetting the device.
+ */
+ if (!sc->vsc_rx_ready || sc->resetting) {
+ /*
+ * Drop the packet and try later.
+ */
+ (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ return;
+ }
+
+ /*
+ * Check for available rx buffers
+ */
+ vq = &sc->vsc_queues[VTNET_RXQ];
+ if (!vq_has_descs(vq)) {
+ /*
+ * Drop the packet and try later. Interrupt on
+ * empty, if that's negotiated.
+ */
+ (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ vq_endchains(vq, 1);
+ return;
+ }
+
+ do {
+ /*
+ * Get descriptor chain.
+ */
+ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+ assert(n >= 1 && n <= VTNET_MAXSEGS);
+
+ /*
+ * Get a pointer to the rx header, and use the
+ * data immediately following it for the packet buffer.
+ */
+ vrx = iov[0].iov_base;
+ riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+
+ len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
+
+ if (len == 0) {
+ /*
+ * No more packets, but still some avail ring
+ * entries. Interrupt if needed/appropriate.
+ */
+ vq_endchains(vq, 0);
+ return;
+ }
+
+ /*
+ * The only valid field in the rx packet header is the
+ * number of buffers if merged rx bufs were negotiated.
+ */
+ memset(vrx, 0, sc->rx_vhdrlen);
+
+ if (sc->rx_merge) {
+ struct virtio_net_rxhdr *vrxh;
+
+ vrxh = vrx;
+ vrxh->vrh_bufs = 1;
+ }
+
+ /*
+ * Release this chain and handle more chains.
+ */
+ vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+ } while (vq_has_descs(vq));
+
+ /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+ vq_endchains(vq, 1);
+}
+
+static void
+pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
{
struct pci_vtnet_softc *sc = param;
pthread_mutex_lock(&sc->rx_mtx);
sc->rx_in_progress = 1;
- pci_vtnet_tap_rx(sc);
+ sc->pci_vtnet_rx(sc);
sc->rx_in_progress = 0;
pthread_mutex_unlock(&sc->rx_mtx);
@@ -421,7 +625,7 @@ pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
}
DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
- pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
+ sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen);
/* chain is processed, release it and set tlen */
vq_relchain(vq, idx, tlen);
@@ -532,6 +736,67 @@ pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
return (0);
}
+static void
+pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname)
+{
+ char tbuf[80];
+
+ strcpy(tbuf, "/dev/");
+ strlcat(tbuf, devname, sizeof(tbuf));
+
+ sc->pci_vtnet_rx = pci_vtnet_tap_rx;
+ sc->pci_vtnet_tx = pci_vtnet_tap_tx;
+
+ sc->vsc_tapfd = open(tbuf, O_RDWR);
+ if (sc->vsc_tapfd == -1) {
+ WPRINTF(("open of tap device %s failed\n", tbuf));
+ return;
+ }
+
+ /*
+ * Set non-blocking and register for read
+ * notifications with the event loop
+ */
+ int opt = 1;
+ if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
+ WPRINTF(("tap device O_NONBLOCK failed\n"));
+ close(sc->vsc_tapfd);
+ sc->vsc_tapfd = -1;
+ }
+
+ sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
+ EVF_READ,
+ pci_vtnet_rx_callback,
+ sc);
+ if (sc->vsc_mevp == NULL) {
+ WPRINTF(("Could not register event\n"));
+ close(sc->vsc_tapfd);
+ sc->vsc_tapfd = -1;
+ }
+}
+
+static void
+pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname)
+{
+ sc->pci_vtnet_rx = pci_vtnet_netmap_rx;
+ sc->pci_vtnet_tx = pci_vtnet_netmap_tx;
+
+ sc->vsc_nmd = nm_open(ifname, NULL, 0, 0);
+ if (sc->vsc_nmd == NULL) {
+ WPRINTF(("open of netmap device %s failed\n", ifname));
+ return;
+ }
+
+ sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd,
+ EVF_READ,
+ pci_vtnet_rx_callback,
+ sc);
+ if (sc->vsc_mevp == NULL) {
+ WPRINTF(("Could not register event\n"));
+ nm_close(sc->vsc_nmd);
+ sc->vsc_nmd = NULL;
+ }
+}
static int
pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
@@ -567,8 +832,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
*/
mac_provided = 0;
sc->vsc_tapfd = -1;
+ sc->vsc_nmd = NULL;
if (opts != NULL) {
- char tbuf[80];
int err;
devname = vtopts = strdup(opts);
@@ -583,36 +848,12 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
mac_provided = 1;
}
- strcpy(tbuf, "/dev/");
- strlcat(tbuf, devname, sizeof(tbuf));
+ if (strncmp(devname, "vale", 4) == 0)
+ pci_vtnet_netmap_setup(sc, devname);
+ if (strncmp(devname, "tap", 3) == 0)
+ pci_vtnet_tap_setup(sc, devname);
free(devname);
-
- sc->vsc_tapfd = open(tbuf, O_RDWR);
- if (sc->vsc_tapfd == -1) {
- WPRINTF(("open of tap device %s failed\n", tbuf));
- } else {
- /*
- * Set non-blocking and register for read
- * notifications with the event loop
- */
- int opt = 1;
- if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
- WPRINTF(("tap device O_NONBLOCK failed\n"));
- close(sc->vsc_tapfd);
- sc->vsc_tapfd = -1;
- }
-
- sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
- EVF_READ,
- pci_vtnet_tap_callback,
- sc);
- if (sc->vsc_mevp == NULL) {
- WPRINTF(("Could not register event\n"));
- close(sc->vsc_tapfd);
- sc->vsc_tapfd = -1;
- }
- }
}
/*
OpenPOWER on IntegriCloud