summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2014-02-18 05:01:04 +0000
committerluigi <luigi@FreeBSD.org>2014-02-18 05:01:04 +0000
commit5bacc3bb87b954978543b0d82a4d5705e33f5c06 (patch)
treea79f129924ca9cf087c1e108d2d184a16ac1e42b /tools
parentdd5bb071cd203986ef23e5ceecdcef3cea848542 (diff)
downloadFreeBSD-src-5bacc3bb87b954978543b0d82a4d5705e33f5c06.zip
FreeBSD-src-5bacc3bb87b954978543b0d82a4d5705e33f5c06.tar.gz
MFH: sync the netmap code with the one in HEAD
(enhanced VALE switch, netmap pipes, emulated netmap mode). See details in the log for svn 261909.
Diffstat (limited to 'tools')
-rw-r--r--tools/tools/netmap/Makefile28
-rw-r--r--tools/tools/netmap/README17
-rw-r--r--tools/tools/netmap/bridge.c203
-rw-r--r--tools/tools/netmap/click-test.cfg19
-rw-r--r--tools/tools/netmap/nm_util.c250
-rw-r--r--tools/tools/netmap/nm_util.h183
-rw-r--r--tools/tools/netmap/pcap.c654
-rw-r--r--tools/tools/netmap/pkt-gen.c1064
-rw-r--r--tools/tools/netmap/vale-ctl.c43
9 files changed, 803 insertions, 1658 deletions
diff --git a/tools/tools/netmap/Makefile b/tools/tools/netmap/Makefile
index d737bac..c502473 100644
--- a/tools/tools/netmap/Makefile
+++ b/tools/tools/netmap/Makefile
@@ -3,26 +3,30 @@
#
# For multiple programs using a single source file each,
# we can just define 'progs' and create custom targets.
-PROGS = pkt-gen bridge vale-ctl testpcap libnetmap.so
+PROGS = pkt-gen bridge vale-ctl
-CLEANFILES = $(PROGS) pcap.o nm_util.o
+CLEANFILES = $(PROGS) *.o
NO_MAN=
-CFLAGS += -Werror -Wall -nostdinc -I/usr/include -I../../../sys
+CFLAGS += -Werror -Wall # -nostdinc -I/usr/include -I../../../sys
CFLAGS += -Wextra
-LDFLAGS += -lpthread -lpcap
+LDFLAGS += -lpthread
+.ifdef WITHOUT_PCAP
+CFLAGS += -DNO_PCAP
+.else
+LDFLAGS += -lpcap
+.endif
.include <bsd.prog.mk>
.include <bsd.lib.mk>
all: $(PROGS)
-pkt-gen bridge: nm_util.o
- $(CC) $(CFLAGS) -o ${.TARGET} ${.TARGET:=.c} nm_util.o $(LDFLAGS)
+pkt-gen: pkt-gen.o
+ $(CC) $(CFLAGS) -o pkt-gen pkt-gen.o $(LDFLAGS)
-testpcap: pcap.c libnetmap.so
- $(CC) $(CFLAGS) -DTEST -L. -lnetmap -o ${.TARGET} pcap.c
-
-libnetmap.so: pcap.c nm_util.c
- $(CC) $(CFLAGS) -fpic -c ${.ALLSRC}
- $(CC) -shared -o ${.TARGET} ${.ALLSRC:.c=.o}
+bridge: bridge.o
+ $(CC) $(CFLAGS) -o bridge bridge.o
+
+vale-ctl: vale-ctl.o
+ $(CC) $(CFLAGS) -o vale-ctl vale-ctl.o
diff --git a/tools/tools/netmap/README b/tools/tools/netmap/README
index 2bde6f2..40378e6 100644
--- a/tools/tools/netmap/README
+++ b/tools/tools/netmap/README
@@ -6,19 +6,4 @@ This directory contains examples that use netmap
bridge a two-port jumper wire, also using the native API
- testpcap a jumper wire using libnetmap (or libpcap)
-
- click* various click examples
-
-------------------------------------------------------------
-Some performance data as of may 2012 for applications using libpcap.
-Throughput is generally in Mpps computed with the 64-byte frames,
-using 1 core on a 2.9GHz CPU and 10Gbit/s interface
-
-Libpcap version -- Application ---------------------
-BSD netmap
----------------------------------------------------
- 0.77 3.82 ports/trafshow (version 5)
- 0.94 7.7 net-mgmt/ipcad (ip accounting daemon)
- 0.9 5.0 net-mgmt/darkstat (ip accounting + graphing)
- 0.83 2.45 net-mgmt/iftop (curses traffic display)
+ vale-ctl the program to control VALE bridges
diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c
index 0aca44d..0895d4e 100644
--- a/tools/tools/netmap/bridge.c
+++ b/tools/tools/netmap/bridge.c
@@ -1,5 +1,5 @@
/*
- * (C) 2011 Luigi Rizzo, Matteo Landi
+ * (C) 2011-2014 Luigi Rizzo, Matteo Landi
*
* BSD license
*
@@ -9,14 +9,15 @@
* $FreeBSD$
*/
-#include "nm_util.h"
-
+#include <stdio.h>
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+#include <sys/poll.h>
int verbose = 0;
-char *version = "$Id$";
-
static int do_abort = 0;
+static int zerocopy = 1; /* enable zerocopy if possible */
static void
sigint_h(int sig)
@@ -28,6 +29,26 @@ sigint_h(int sig)
/*
+ * how many packets on this set of queues ?
+ */
+int
+pkt_queued(struct nm_desc *d, int tx)
+{
+ u_int i, tot = 0;
+
+ if (tx) {
+ for (i = d->first_tx_ring; i <= d->last_tx_ring; i++) {
+ tot += nm_ring_space(NETMAP_TXRING(d->nifp, i));
+ }
+ } else {
+ for (i = d->first_rx_ring; i <= d->last_rx_ring; i++) {
+ tot += nm_ring_space(NETMAP_RXRING(d->nifp, i));
+ }
+ }
+ return tot;
+}
+
+/*
* move up to 'limit' pkts from rxring to txring swapping buffers.
*/
static int
@@ -42,20 +63,16 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
msg, rxring->flags, txring->flags);
j = rxring->cur; /* RX */
k = txring->cur; /* TX */
- if (rxring->avail < limit)
- limit = rxring->avail;
- if (txring->avail < limit)
- limit = txring->avail;
+ m = nm_ring_space(rxring);
+ if (m < limit)
+ limit = m;
+ m = nm_ring_space(txring);
+ if (m < limit)
+ limit = m;
m = limit;
while (limit-- > 0) {
struct netmap_slot *rs = &rxring->slot[j];
struct netmap_slot *ts = &txring->slot[k];
-#ifdef NO_SWAP
- char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx);
- char *txbuf = NETMAP_BUF(txring, ts->buf_idx);
-#else
- uint32_t pkt;
-#endif
/* swap packets */
if (ts->buf_idx < 2 || rs->buf_idx < 2) {
@@ -63,31 +80,31 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
j, rs->buf_idx, k, ts->buf_idx);
sleep(2);
}
-#ifndef NO_SWAP
- pkt = ts->buf_idx;
- ts->buf_idx = rs->buf_idx;
- rs->buf_idx = pkt;
-#endif
/* copy the packet length. */
- if (rs->len < 14 || rs->len > 2048)
+ if (rs->len > 2048) {
D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k);
- else if (verbose > 1)
+ rs->len = 0;
+ } else if (verbose > 1) {
D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k);
+ }
ts->len = rs->len;
-#ifdef NO_SWAP
- pkt_copy(rxbuf, txbuf, ts->len);
-#else
- /* report the buffer change. */
- ts->flags |= NS_BUF_CHANGED;
- rs->flags |= NS_BUF_CHANGED;
-#endif /* NO_SWAP */
- j = NETMAP_RING_NEXT(rxring, j);
- k = NETMAP_RING_NEXT(txring, k);
+ if (zerocopy) {
+ uint32_t pkt = ts->buf_idx;
+ ts->buf_idx = rs->buf_idx;
+ rs->buf_idx = pkt;
+ /* report the buffer change. */
+ ts->flags |= NS_BUF_CHANGED;
+ rs->flags |= NS_BUF_CHANGED;
+ } else {
+ char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx);
+ char *txbuf = NETMAP_BUF(txring, ts->buf_idx);
+ nm_pkt_copy(rxbuf, txbuf, ts->len);
+ }
+ j = nm_ring_next(rxring, j);
+ k = nm_ring_next(txring, k);
}
- rxring->avail -= m;
- txring->avail -= m;
- rxring->cur = j;
- txring->cur = k;
+ rxring->head = rxring->cur = j;
+ txring->head = txring->cur = k;
if (verbose && m > 0)
D("%s sent %d packets to %p", msg, m, txring);
@@ -96,22 +113,22 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
/* move packts from src to destination */
static int
-move(struct my_ring *src, struct my_ring *dst, u_int limit)
+move(struct nm_desc *src, struct nm_desc *dst, u_int limit)
{
struct netmap_ring *txring, *rxring;
- u_int m = 0, si = src->begin, di = dst->begin;
- const char *msg = (src->queueid & NETMAP_SW_RING) ?
+ u_int m = 0, si = src->first_rx_ring, di = dst->first_tx_ring;
+ const char *msg = (src->req.nr_ringid & NETMAP_SW_RING) ?
"host->net" : "net->host";
- while (si < src->end && di < dst->end) {
+ while (si <= src->last_rx_ring && di <= dst->last_tx_ring) {
rxring = NETMAP_RXRING(src->nifp, si);
txring = NETMAP_TXRING(dst->nifp, di);
ND("txring %p rxring %p", txring, rxring);
- if (rxring->avail == 0) {
+ if (nm_ring_empty(rxring)) {
si++;
continue;
}
- if (txring->avail == 0) {
+ if (nm_ring_empty(txring)) {
di++;
continue;
}
@@ -121,28 +138,6 @@ move(struct my_ring *src, struct my_ring *dst, u_int limit)
return (m);
}
-/*
- * how many packets on this set of queues ?
- */
-static int
-pkt_queued(struct my_ring *me, int tx)
-{
- u_int i, tot = 0;
-
- ND("me %p begin %d end %d", me, me->begin, me->end);
- for (i = me->begin; i < me->end; i++) {
- struct netmap_ring *ring = tx ?
- NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i);
- tot += ring->avail;
- }
- if (0 && verbose && tot && !tx)
- D("ring %s %s %s has %d avail at %d",
- me->ifname, tx ? "tx": "rx",
- me->end >= me->nifp->ni_tx_rings ? // XXX who comes first ?
- "host":"net",
- tot, NETMAP_TXRING(me->nifp, me->begin)->cur);
- return tot;
-}
static void
usage(void)
@@ -163,17 +158,16 @@ int
main(int argc, char **argv)
{
struct pollfd pollfd[2];
- int i, ch;
+ int ch;
u_int burst = 1024, wait_link = 4;
- struct my_ring me[2];
+ struct nm_desc *pa = NULL, *pb = NULL;
char *ifa = NULL, *ifb = NULL;
+ char ifabuf[64] = { 0 };
- fprintf(stderr, "%s %s built %s %s\n",
- argv[0], version, __DATE__, __TIME__);
-
- bzero(me, sizeof(me));
+ fprintf(stderr, "%s built %s %s\n",
+ argv[0], __DATE__, __TIME__);
- while ( (ch = getopt(argc, argv, "b:i:vw:")) != -1) {
+ while ( (ch = getopt(argc, argv, "b:ci:vw:")) != -1) {
switch (ch) {
default:
D("bad option %c %s", ch, optarg);
@@ -191,6 +185,9 @@ main(int argc, char **argv)
D("%s ignored, already have 2 interfaces",
optarg);
break;
+ case 'c':
+ zerocopy = 0; /* do not zerocopy */
+ break;
case 'v':
verbose++;
break;
@@ -224,34 +221,38 @@ main(int argc, char **argv)
D("invalid wait_link %d, set to 4", wait_link);
wait_link = 4;
}
- /* setup netmap interface #1. */
- me[0].ifname = ifa;
- me[1].ifname = ifb;
if (!strcmp(ifa, ifb)) {
D("same interface, endpoint 0 goes to host");
- i = NETMAP_SW_RING;
+ snprintf(ifabuf, sizeof(ifabuf) - 1, "%s^", ifa);
+ ifa = ifabuf;
} else {
/* two different interfaces. Take all rings on if1 */
- i = 0; // all hw rings
}
- if (netmap_open(me, i, 1))
+ pa = nm_open(ifa, NULL, 0, NULL);
+ if (pa == NULL) {
+ D("cannot open %s", ifa);
return (1);
- me[1].mem = me[0].mem; /* copy the pointer, so only one mmap */
- if (netmap_open(me+1, 0, 1))
+ }
+ // XXX use a single mmap ?
+ pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa);
+ if (pb == NULL) {
+ D("cannot open %s", ifb);
+ nm_close(pa);
return (1);
+ }
+ zerocopy = zerocopy && (pa->mem == pb->mem);
+ D("------- zerocopy %ssupported", zerocopy ? "" : "NOT ");
/* setup poll(2) variables. */
memset(pollfd, 0, sizeof(pollfd));
- for (i = 0; i < 2; i++) {
- pollfd[i].fd = me[i].fd;
- pollfd[i].events = (POLLIN);
- }
+ pollfd[0].fd = pa->fd;
+ pollfd[1].fd = pb->fd;
D("Wait %d secs for link to come up...", wait_link);
sleep(wait_link);
D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.",
- me[0].ifname, me[0].queueid, me[0].nifp->ni_rx_rings,
- me[1].ifname, me[1].queueid, me[1].nifp->ni_rx_rings);
+ pa->req.nr_name, pa->first_rx_ring, pa->req.nr_rx_rings,
+ pb->req.nr_name, pb->first_rx_ring, pb->req.nr_rx_rings);
/* main loop */
signal(SIGINT, sigint_h);
@@ -259,8 +260,8 @@ main(int argc, char **argv)
int n0, n1, ret;
pollfd[0].events = pollfd[1].events = 0;
pollfd[0].revents = pollfd[1].revents = 0;
- n0 = pkt_queued(me, 0);
- n1 = pkt_queued(me + 1, 0);
+ n0 = pkt_queued(pa, 0);
+ n1 = pkt_queued(pb, 0);
if (n0)
pollfd[1].events |= POLLOUT;
else
@@ -276,39 +277,41 @@ main(int argc, char **argv)
ret <= 0 ? "timeout" : "ok",
pollfd[0].events,
pollfd[0].revents,
- pkt_queued(me, 0),
- me[0].rx->cur,
- pkt_queued(me, 1),
+ pkt_queued(pa, 0),
+ NETMAP_RXRING(pa->nifp, pa->cur_rx_ring)->cur,
+ pkt_queued(pa, 1),
pollfd[1].events,
pollfd[1].revents,
- pkt_queued(me+1, 0),
- me[1].rx->cur,
- pkt_queued(me+1, 1)
+ pkt_queued(pb, 0),
+ NETMAP_RXRING(pb->nifp, pb->cur_rx_ring)->cur,
+ pkt_queued(pb, 1)
);
if (ret < 0)
continue;
if (pollfd[0].revents & POLLERR) {
- D("error on fd0, rxcur %d@%d",
- me[0].rx->avail, me[0].rx->cur);
+ struct netmap_ring *rx = NETMAP_RXRING(pa->nifp, pa->cur_rx_ring);
+ D("error on fd0, rx [%d,%d,%d)",
+ rx->head, rx->cur, rx->tail);
}
if (pollfd[1].revents & POLLERR) {
- D("error on fd1, rxcur %d@%d",
- me[1].rx->avail, me[1].rx->cur);
+ struct netmap_ring *rx = NETMAP_RXRING(pb->nifp, pb->cur_rx_ring);
+ D("error on fd1, rx [%d,%d,%d)",
+ rx->head, rx->cur, rx->tail);
}
if (pollfd[0].revents & POLLOUT) {
- move(me + 1, me, burst);
+ move(pb, pa, burst);
// XXX we don't need the ioctl */
// ioctl(me[0].fd, NIOCTXSYNC, NULL);
}
if (pollfd[1].revents & POLLOUT) {
- move(me, me + 1, burst);
+ move(pa, pb, burst);
// XXX we don't need the ioctl */
// ioctl(me[1].fd, NIOCTXSYNC, NULL);
}
}
D("exiting");
- netmap_close(me + 1);
- netmap_close(me + 0);
+ nm_close(pb);
+ nm_close(pa);
return (0);
}
diff --git a/tools/tools/netmap/click-test.cfg b/tools/tools/netmap/click-test.cfg
deleted file mode 100644
index fc5759f..0000000
--- a/tools/tools/netmap/click-test.cfg
+++ /dev/null
@@ -1,19 +0,0 @@
-//
-// $FreeBSD$
-//
-// A sample test configuration for click
-//
-//
-// create a switch
-
-myswitch :: EtherSwitch;
-
-// two input devices
-
-c0 :: FromDevice(ix0, PROMISC true);
-c1 :: FromDevice(ix1, PROMISC true);
-
-// and now pass packets around
-
-c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0);
-c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1);
diff --git a/tools/tools/netmap/nm_util.c b/tools/tools/netmap/nm_util.c
deleted file mode 100644
index 6153603..0000000
--- a/tools/tools/netmap/nm_util.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (C) 2012 Luigi Rizzo. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- * $Id$
- *
- * utilities to use netmap devices.
- * This does the basic functions of opening a device and issuing
- * ioctls()
- */
-
-#include "nm_util.h"
-
-extern int verbose;
-
-int
-nm_do_ioctl(struct my_ring *me, u_long what, int subcmd)
-{
- struct ifreq ifr;
- int error;
-#if defined( __FreeBSD__ ) || defined (__APPLE__)
- int fd = me->fd;
-#endif
-#ifdef linux
- struct ethtool_value eval;
- int fd;
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0) {
- printf("Error: cannot get device control socket.\n");
- return -1;
- }
-#endif /* linux */
-
- (void)subcmd; // unused
- bzero(&ifr, sizeof(ifr));
- strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name));
- switch (what) {
- case SIOCSIFFLAGS:
-#ifndef __APPLE__
- ifr.ifr_flagshigh = me->if_flags >> 16;
-#endif
- ifr.ifr_flags = me->if_flags & 0xffff;
- break;
-
-#if defined( __FreeBSD__ )
- case SIOCSIFCAP:
- ifr.ifr_reqcap = me->if_reqcap;
- ifr.ifr_curcap = me->if_curcap;
- break;
-#endif
-#ifdef linux
- case SIOCETHTOOL:
- eval.cmd = subcmd;
- eval.data = 0;
- ifr.ifr_data = (caddr_t)&eval;
- break;
-#endif /* linux */
- }
- error = ioctl(fd, what, &ifr);
- if (error)
- goto done;
- switch (what) {
- case SIOCGIFFLAGS:
-#ifndef __APPLE__
- me->if_flags = (ifr.ifr_flagshigh << 16) |
- (0xffff & ifr.ifr_flags);
-#endif
- if (verbose)
- D("flags are 0x%x", me->if_flags);
- break;
-
-#if defined( __FreeBSD__ )
- case SIOCGIFCAP:
- me->if_reqcap = ifr.ifr_reqcap;
- me->if_curcap = ifr.ifr_curcap;
- if (verbose)
- D("curcap are 0x%x", me->if_curcap);
- break;
-#endif /* __FreeBSD__ */
- }
-done:
-#ifdef linux
- close(fd);
-#endif
- if (error)
- D("ioctl error %d %lu", error, what);
- return error;
-}
-
-/*
- * open a device. if me->mem is null then do an mmap.
- * Returns the file descriptor.
- * The extra flag checks configures promisc mode.
- */
-int
-netmap_open(struct my_ring *me, int ringid, int promisc)
-{
- int fd, err, l;
- struct nmreq req;
-
- me->fd = fd = open("/dev/netmap", O_RDWR);
- if (fd < 0) {
- D("Unable to open /dev/netmap");
- return (-1);
- }
- bzero(&req, sizeof(req));
- req.nr_version = NETMAP_API;
- strncpy(req.nr_name, me->ifname, sizeof(req.nr_name));
- req.nr_ringid = ringid;
- err = ioctl(fd, NIOCGINFO, &req);
- if (err) {
- D("cannot get info on %s, errno %d ver %d",
- me->ifname, errno, req.nr_version);
- goto error;
- }
- me->memsize = l = req.nr_memsize;
- if (verbose)
- D("memsize is %d MB", l>>20);
- err = ioctl(fd, NIOCREGIF, &req);
- if (err) {
- D("Unable to register %s", me->ifname);
- goto error;
- }
-
- if (me->mem == NULL) {
- me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
- if (me->mem == MAP_FAILED) {
- D("Unable to mmap");
- me->mem = NULL;
- goto error;
- }
- }
-
-
- /* Set the operating mode. */
- if (ringid != NETMAP_SW_RING) {
- nm_do_ioctl(me, SIOCGIFFLAGS, 0);
- if ((me[0].if_flags & IFF_UP) == 0) {
- D("%s is down, bringing up...", me[0].ifname);
- me[0].if_flags |= IFF_UP;
- }
- if (promisc) {
- me[0].if_flags |= IFF_PPROMISC;
- nm_do_ioctl(me, SIOCSIFFLAGS, 0);
- }
-
-#ifdef __FreeBSD__
- /* also disable checksums etc. */
- nm_do_ioctl(me, SIOCGIFCAP, 0);
- me[0].if_reqcap = me[0].if_curcap;
- me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
- nm_do_ioctl(me+0, SIOCSIFCAP, 0);
-#endif
-#ifdef linux
- /* disable:
- * - generic-segmentation-offload
- * - tcp-segmentation-offload
- * - rx-checksumming
- * - tx-checksumming
- * XXX check how to set back the caps.
- */
- nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SGSO);
- nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STSO);
- nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SRXCSUM);
- nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STXCSUM);
-#endif /* linux */
- }
-
- me->nifp = NETMAP_IF(me->mem, req.nr_offset);
- me->queueid = ringid;
- if (ringid & NETMAP_SW_RING) {
- me->begin = req.nr_rx_rings;
- me->end = me->begin + 1;
- me->tx = NETMAP_TXRING(me->nifp, req.nr_tx_rings);
- me->rx = NETMAP_RXRING(me->nifp, req.nr_rx_rings);
- } else if (ringid & NETMAP_HW_RING) {
- D("XXX check multiple threads");
- me->begin = ringid & NETMAP_RING_MASK;
- me->end = me->begin + 1;
- me->tx = NETMAP_TXRING(me->nifp, me->begin);
- me->rx = NETMAP_RXRING(me->nifp, me->begin);
- } else {
- me->begin = 0;
- me->end = req.nr_rx_rings; // XXX max of the two
- me->tx = NETMAP_TXRING(me->nifp, 0);
- me->rx = NETMAP_RXRING(me->nifp, 0);
- }
- return (0);
-error:
- close(me->fd);
- return -1;
-}
-
-
-int
-netmap_close(struct my_ring *me)
-{
- D("");
- if (me->mem)
- munmap(me->mem, me->memsize);
- close(me->fd);
- return (0);
-}
-
-
-/*
- * how many packets on this set of queues ?
- */
-int
-pkt_queued(struct my_ring *me, int tx)
-{
- u_int i, tot = 0;
-
- ND("me %p begin %d end %d", me, me->begin, me->end);
- for (i = me->begin; i < me->end; i++) {
- struct netmap_ring *ring = tx ?
- NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i);
- tot += ring->avail;
- }
- if (0 && verbose && tot && !tx)
- D("ring %s %s %s has %d avail at %d",
- me->ifname, tx ? "tx": "rx",
- me->end >= me->nifp->ni_tx_rings ? // XXX who comes first ?
- "host":"net",
- tot, NETMAP_TXRING(me->nifp, me->begin)->cur);
- return tot;
-}
diff --git a/tools/tools/netmap/nm_util.h b/tools/tools/netmap/nm_util.h
deleted file mode 100644
index 0d64f13..0000000
--- a/tools/tools/netmap/nm_util.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (C) 2012 Luigi Rizzo. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- * $Id$
- *
- * Some utilities to build netmap-based programs.
- */
-
-#ifndef _NM_UTIL_H
-#define _NM_UTIL_H
-#include <errno.h>
-#include <signal.h> /* signal */
-#include <stdlib.h>
-#include <stdio.h>
-#include <inttypes.h> /* PRI* macros */
-#include <string.h> /* strcmp */
-#include <fcntl.h> /* open */
-#include <unistd.h> /* close */
-#include <ifaddrs.h> /* getifaddrs */
-
-#include <sys/mman.h> /* PROT_* */
-#include <sys/ioctl.h> /* ioctl */
-#include <sys/poll.h>
-#include <sys/socket.h> /* sockaddr.. */
-#include <arpa/inet.h> /* ntohs */
-#include <sys/param.h>
-#include <sys/sysctl.h> /* sysctl */
-#include <sys/time.h> /* timersub */
-
-#include <net/ethernet.h>
-#include <net/if.h> /* ifreq */
-
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/udp.h>
-
-#include <net/netmap.h>
-#include <net/netmap_user.h>
-
-#ifndef MY_PCAP /* use the system's pcap if available */
-
-#ifdef NO_PCAP
-#define PCAP_ERRBUF_SIZE 512
-typedef void pcap_t;
-struct pcap_pkthdr;
-#define pcap_inject(a,b,c) ((void)a, (void)b, (void)c, -1)
-#define pcap_dispatch(a, b, c, d) (void)c
-#define pcap_open_live(a, b, c, d, e) ((void)e, NULL)
-#else /* !NO_PCAP */
-#include <pcap/pcap.h> // XXX do we need it ?
-#endif /* !NO_PCAP */
-
-#endif // XXX hack
-
-#include <pthread.h> /* pthread_* */
-
-#ifdef linux
-#define ifr_flagshigh ifr_flags
-#define ifr_curcap ifr_flags
-#define ifr_reqcap ifr_flags
-#define IFF_PPROMISC IFF_PROMISC
-#include <linux/ethtool.h>
-#include <linux/sockios.h>
-
-#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
-#include <netinet/ether.h> /* ether_aton */
-#include <linux/if_packet.h> /* sockaddr_ll */
-#endif /* linux */
-
-#ifdef __FreeBSD__
-#include <sys/endian.h> /* le64toh */
-#include <machine/param.h>
-
-#include <pthread_np.h> /* pthread w/ affinity */
-#include <sys/cpuset.h> /* cpu_set */
-#include <net/if_dl.h> /* LLADDR */
-#endif /* __FreeBSD__ */
-
-#ifdef __APPLE__
-#define ifr_flagshigh ifr_flags // XXX
-#define IFF_PPROMISC IFF_PROMISC
-#include <net/if_dl.h> /* LLADDR */
-#define clock_gettime(a,b) \
- do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
-#endif /* __APPLE__ */
-
-static inline int min(int a, int b) { return a < b ? a : b; }
-extern int time_second;
-
-/* debug support */
-#define ND(format, ...) do {} while(0)
-#define D(format, ...) \
- fprintf(stderr, "%s [%d] " format "\n", \
- __FUNCTION__, __LINE__, ##__VA_ARGS__)
-
-#define RD(lps, format, ...) \
- do { \
- static int t0, cnt; \
- if (t0 != time_second) { \
- t0 = time_second; \
- cnt = 0; \
- } \
- if (cnt++ < lps) \
- D(format, ##__VA_ARGS__); \
- } while (0)
-
-
-
-// XXX does it work on 32-bit machines ?
-static inline void prefetch (const void *x)
-{
- __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x));
-}
-
-// XXX only for multiples of 64 bytes, non overlapped.
-static inline void
-pkt_copy(const void *_src, void *_dst, int l)
-{
- const uint64_t *src = _src;
- uint64_t *dst = _dst;
-#define likely(x) __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
- if (unlikely(l >= 1024)) {
- bcopy(src, dst, l);
- return;
- }
- for (; l > 0; l-=64) {
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- }
-}
-
-/*
- * info on a ring we handle
- */
-struct my_ring {
- const char *ifname;
- int fd;
- char *mem; /* userspace mmap address */
- u_int memsize;
- u_int queueid;
- u_int begin, end; /* first..last+1 rings to check */
- struct netmap_if *nifp;
- struct netmap_ring *tx, *rx; /* shortcuts */
-
- uint32_t if_flags;
- uint32_t if_reqcap;
- uint32_t if_curcap;
-};
-int netmap_open(struct my_ring *me, int ringid, int promisc);
-int netmap_close(struct my_ring *me);
-int nm_do_ioctl(struct my_ring *me, u_long what, int subcmd);
-#endif /* _NM_UTIL_H */
diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c
deleted file mode 100644
index f30f57b..0000000
--- a/tools/tools/netmap/pcap.c
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
- * (C) 2011-2012 Luigi Rizzo
- *
- * BSD license
- *
- * A simple library that maps some pcap functions onto netmap
- * This is not 100% complete but enough to let tcpdump, trafshow
- * and other apps work.
- *
- * $FreeBSD$
- */
-
-#define MY_PCAP
-#include "nm_util.h"
-
-char *version = "$Id$";
-int verbose = 0;
-
-/*
- * We redefine here a number of structures that are in pcap.h
- * so we can compile this file without the system header.
- */
-#ifndef PCAP_ERRBUF_SIZE
-#define PCAP_ERRBUF_SIZE 128
-/*
- * Each packet is accompanied by a header including the timestamp,
- * captured size and actual size.
- */
-struct pcap_pkthdr {
- struct timeval ts; /* time stamp */
- uint32_t caplen; /* length of portion present */
- uint32_t len; /* length this packet (off wire) */
-};
-
-typedef struct pcap_if pcap_if_t;
-
-/*
- * Representation of an interface address.
- */
-struct pcap_addr {
- struct pcap_addr *next;
- struct sockaddr *addr; /* address */
- struct sockaddr *netmask; /* netmask for the above */
- struct sockaddr *broadaddr; /* broadcast addr for the above */
- struct sockaddr *dstaddr; /* P2P dest. address for the above */
-};
-
-struct pcap_if {
- struct pcap_if *next;
- char *name; /* name to hand to "pcap_open_live()" */
- char *description; /* textual description of interface, or NULL */
- struct pcap_addr *addresses;
- uint32_t flags; /* PCAP_IF_ interface flags */
-};
-
-/*
- * We do not support stats (yet)
- */
-struct pcap_stat {
- u_int ps_recv; /* number of packets received */
- u_int ps_drop; /* number of packets dropped */
- u_int ps_ifdrop; /* drops by interface XXX not yet supported */
-#ifdef WIN32
- u_int bs_capt; /* number of packets that reach the app. */
-#endif /* WIN32 */
-};
-
-typedef void pcap_t;
-typedef enum {
- PCAP_D_INOUT = 0,
- PCAP_D_IN,
- PCAP_D_OUT
-} pcap_direction_t;
-
-
-
-typedef void (*pcap_handler)(u_char *user,
- const struct pcap_pkthdr *h, const u_char *bytes);
-
-char errbuf[PCAP_ERRBUF_SIZE];
-
-pcap_t *pcap_open_live(const char *device, int snaplen,
- int promisc, int to_ms, char *errbuf);
-
-int pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf);
-void pcap_close(pcap_t *p);
-int pcap_get_selectable_fd(pcap_t *p);
-int pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user);
-int pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf);
-int pcap_setdirection(pcap_t *p, pcap_direction_t d);
-char *pcap_lookupdev(char *errbuf);
-int pcap_inject(pcap_t *p, const void *buf, size_t size);
-int pcap_fileno(pcap_t *p);
-const char *pcap_lib_version(void);
-
-
-struct eproto {
- const char *s;
- u_short p;
-};
-#endif /* !PCAP_ERRBUF_SIZE */
-
-#ifndef TEST
-/*
- * build as a shared library
- */
-
-char pcap_version[] = "libnetmap version 0.3";
-
-/*
- * Our equivalent of pcap_t
- */
-struct pcap_ring {
- struct my_ring me;
-#if 0
- const char *ifname;
-
- //struct nmreq nmr;
-
- int fd;
- char *mem; /* userspace mmap address */
- u_int memsize;
- u_int queueid;
- u_int begin, end; /* first..last+1 rings to check */
- struct netmap_if *nifp;
-
- uint32_t if_flags;
- uint32_t if_reqcap;
- uint32_t if_curcap;
-#endif
- int snaplen;
- char *errbuf;
- int promisc;
- int to_ms;
-
- struct pcap_pkthdr hdr;
-
-
- struct pcap_stat st;
-
- char msg[PCAP_ERRBUF_SIZE];
-};
-
-
-
-/*
- * There is a set of functions that tcpdump expects even if probably
- * not used
- */
-struct eproto eproto_db[] = {
- { "ip", ETHERTYPE_IP },
- { "arp", ETHERTYPE_ARP },
- { (char *)0, 0 }
-};
-
-
-const char *pcap_lib_version(void)
-{
- return pcap_version;
-}
-
-int
-pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf)
-{
- pcap_if_t *top = NULL;
-#ifndef linux
- struct ifaddrs *i_head, *i;
- pcap_if_t *cur;
- struct pcap_addr *tail = NULL;
- int l;
-
- D("listing all devs");
- *alldevsp = NULL;
- i_head = NULL;
-
- if (getifaddrs(&i_head)) {
- D("cannot get if addresses");
- return -1;
- }
- for (i = i_head; i; i = i->ifa_next) {
- //struct ifaddrs *ifa;
- struct pcap_addr *pca;
- //struct sockaddr *sa;
-
- D("got interface %s", i->ifa_name);
- if (!top || strcmp(top->name, i->ifa_name)) {
- /* new interface */
- l = sizeof(*top) + strlen(i->ifa_name) + 1;
- cur = calloc(1, l);
- if (cur == NULL) {
- D("no space for if descriptor");
- continue;
- }
- cur->name = (char *)(cur + 1);
- //cur->flags = i->ifa_flags;
- strcpy(cur->name, i->ifa_name);
- cur->description = NULL;
- cur->next = top;
- top = cur;
- tail = NULL;
- }
- /* now deal with addresses */
- D("%s addr family %d len %d %s %s",
- top->name,
- i->ifa_addr->sa_family, i->ifa_addr->sa_len,
- i->ifa_netmask ? "Netmask" : "",
- i->ifa_broadaddr ? "Broadcast" : "");
- l = sizeof(struct pcap_addr) +
- (i->ifa_addr ? i->ifa_addr->sa_len:0) +
- (i->ifa_netmask ? i->ifa_netmask->sa_len:0) +
- (i->ifa_broadaddr? i->ifa_broadaddr->sa_len:0);
- pca = calloc(1, l);
- if (pca == NULL) {
- D("no space for if addr");
- continue;
- }
-#define SA_NEXT(x) ((struct sockaddr *)((char *)(x) + (x)->sa_len))
- pca->addr = (struct sockaddr *)(pca + 1);
- pkt_copy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len);
- if (i->ifa_netmask) {
- pca->netmask = SA_NEXT(pca->addr);
- bcopy(i->ifa_netmask, pca->netmask, i->ifa_netmask->sa_len);
- if (i->ifa_broadaddr) {
- pca->broadaddr = SA_NEXT(pca->netmask);
- bcopy(i->ifa_broadaddr, pca->broadaddr, i->ifa_broadaddr->sa_len);
- }
- }
- if (tail == NULL) {
- top->addresses = pca;
- } else {
- tail->next = pca;
- }
- tail = pca;
-
- }
- freeifaddrs(i_head);
-#endif /* !linux */
- (void)errbuf; /* UNUSED */
- *alldevsp = top;
- return 0;
-}
-
-void pcap_freealldevs(pcap_if_t *alldevs)
-{
- (void)alldevs; /* UNUSED */
- D("unimplemented");
-}
-
-char *
-pcap_lookupdev(char *buf)
-{
- D("%s", buf);
- strcpy(buf, "/dev/netmap");
- return buf;
-}
-
-pcap_t *
-pcap_create(const char *source, char *errbuf)
-{
- D("src %s (call open liveted)", source);
- return pcap_open_live(source, 0, 1, 100, errbuf);
-}
-
-int
-pcap_activate(pcap_t *p)
-{
- D("pcap %p running", p);
- return 0;
-}
-
-int
-pcap_can_set_rfmon(pcap_t *p)
-{
- (void)p; /* UNUSED */
- D("");
- return 0; /* no we can't */
-}
-
-int
-pcap_set_snaplen(pcap_t *p, int snaplen)
-{
- struct pcap_ring *me = p;
-
- D("len %d", snaplen);
- me->snaplen = snaplen;
- return 0;
-}
-
-int
-pcap_snapshot(pcap_t *p)
-{
- struct pcap_ring *me = p;
-
- D("len %d", me->snaplen);
- return me->snaplen;
-}
-
-int
-pcap_lookupnet(const char *device, uint32_t *netp,
- uint32_t *maskp, char *errbuf)
-{
-
- (void)errbuf; /* UNUSED */
- D("device %s", device);
- inet_aton("10.0.0.255", (struct in_addr *)netp);
- inet_aton("255.255.255.0",(struct in_addr *) maskp);
- return 0;
-}
-
-int
-pcap_set_promisc(pcap_t *p, int promisc)
-{
- struct pcap_ring *me = p;
-
- D("promisc %d", promisc);
- if (nm_do_ioctl(&me->me, SIOCGIFFLAGS, 0))
- D("SIOCGIFFLAGS failed");
- if (promisc) {
- me->me.if_flags |= IFF_PPROMISC;
- } else {
- me->me.if_flags &= ~IFF_PPROMISC;
- }
- if (nm_do_ioctl(&me->me, SIOCSIFFLAGS, 0))
- D("SIOCSIFFLAGS failed");
- return 0;
-}
-
-int
-pcap_set_timeout(pcap_t *p, int to_ms)
-{
- struct pcap_ring *me = p;
-
- D("%d ms", to_ms);
- me->to_ms = to_ms;
- return 0;
-}
-
-struct bpf_program;
-
-int
-pcap_compile(pcap_t *p, struct bpf_program *fp,
- const char *str, int optimize, uint32_t netmask)
-{
- (void)p; /* UNUSED */
- (void)fp; /* UNUSED */
- (void)optimize; /* UNUSED */
- (void)netmask; /* UNUSED */
- D("%s", str);
- return 0;
-}
-
-int
-pcap_setfilter(pcap_t *p, struct bpf_program *fp)
-{
- (void)p; /* UNUSED */
- (void)fp; /* UNUSED */
- D("");
- return 0;
-}
-
-int
-pcap_datalink(pcap_t *p)
-{
- (void)p; /* UNUSED */
- D("returns 1");
- return 1; // ethernet
-}
-
-const char *
-pcap_datalink_val_to_name(int dlt)
-{
- D("%d returns DLT_EN10MB", dlt);
- return "DLT_EN10MB";
-}
-
-const char *
-pcap_datalink_val_to_description(int dlt)
-{
- D("%d returns Ethernet link", dlt);
- return "Ethernet link";
-}
-
-struct pcap_stat;
-int
-pcap_stats(pcap_t *p, struct pcap_stat *ps)
-{
- struct pcap_ring *me = p;
- ND("");
-
- *ps = me->st;
- return 0; /* accumulate from pcap_dispatch() */
-};
-
-char *
-pcap_geterr(pcap_t *p)
-{
- struct pcap_ring *me = p;
-
- D("");
- return me->msg;
-}
-
-pcap_t *
-pcap_open_live(const char *device, int snaplen,
- int promisc, int to_ms, char *errbuf)
-{
- struct pcap_ring *me;
- int l;
-
- (void)snaplen; /* UNUSED */
- (void)errbuf; /* UNUSED */
- if (!device) {
- D("missing device name");
- return NULL;
- }
-
- l = strlen(device) + 1;
- D("request to open %s snaplen %d promisc %d timeout %dms",
- device, snaplen, promisc, to_ms);
- me = calloc(1, sizeof(*me) + l);
- if (me == NULL) {
- D("failed to allocate struct for %s", device);
- return NULL;
- }
- me->me.ifname = (char *)(me + 1);
- strcpy((char *)me->me.ifname, device);
- if (netmap_open(&me->me, 0, promisc)) {
- D("error opening %s", device);
- free(me);
- return NULL;
- }
- me->to_ms = to_ms;
-
- return (pcap_t *)me;
-}
-
-void
-pcap_close(pcap_t *p)
-{
- struct my_ring *me = p;
-
- D("");
- if (!me)
- return;
- if (me->mem)
- munmap(me->mem, me->memsize);
- /* restore original flags ? */
- close(me->fd);
- bzero(me, sizeof(*me));
- free(me);
-}
-
-int
-pcap_fileno(pcap_t *p)
-{
- struct my_ring *me = p;
- D("returns %d", me->fd);
- return me->fd;
-}
-
-int
-pcap_get_selectable_fd(pcap_t *p)
-{
- struct my_ring *me = p;
-
- ND("");
- return me->fd;
-}
-
-int
-pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf)
-{
- (void)p; /* UNUSED */
- (void)errbuf; /* UNUSED */
- D("mode is %d", nonblock);
- return 0; /* ignore */
-}
-
-int
-pcap_setdirection(pcap_t *p, pcap_direction_t d)
-{
- (void)p; /* UNUSED */
- (void)d; /* UNUSED */
- D("");
- return 0; /* ignore */
-};
-
-int
-pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
-{
- struct pcap_ring *pme = p;
- struct my_ring *me = &pme->me;
- int got = 0;
- u_int si;
-
- ND("cnt %d", cnt);
- if (cnt == 0)
- cnt = -1;
- /* scan all rings */
- for (si = me->begin; si < me->end; si++) {
- struct netmap_ring *ring = NETMAP_RXRING(me->nifp, si);
- ND("ring has %d pkts", ring->avail);
- if (ring->avail == 0)
- continue;
- pme->hdr.ts = ring->ts;
- /*
- * XXX a proper prefetch should be done as
- * prefetch(i); callback(i-1); ...
- */
- while ((cnt == -1 || cnt != got) && ring->avail > 0) {
- u_int i = ring->cur;
- u_int idx = ring->slot[i].buf_idx;
- if (idx < 2) {
- D("%s bogus RX index %d at offset %d",
- me->nifp->ni_name, idx, i);
- sleep(2);
- }
- u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
- prefetch(buf);
- pme->hdr.len = pme->hdr.caplen = ring->slot[i].len;
- // D("call %p len %d", p, me->hdr.len);
- callback(user, &pme->hdr, buf);
- ring->cur = NETMAP_RING_NEXT(ring, i);
- ring->avail--;
- got++;
- }
- }
- pme->st.ps_recv += got;
- return got;
-}
-
-int
-pcap_inject(pcap_t *p, const void *buf, size_t size)
-{
- struct my_ring *me = p;
- u_int si;
-
- ND("cnt %d", cnt);
- /* scan all rings */
- for (si = me->begin; si < me->end; si++) {
- struct netmap_ring *ring = NETMAP_TXRING(me->nifp, si);
-
- ND("ring has %d pkts", ring->avail);
- if (ring->avail == 0)
- continue;
- u_int i = ring->cur;
- u_int idx = ring->slot[i].buf_idx;
- if (idx < 2) {
- D("%s bogus TX index %d at offset %d",
- me->nifp->ni_name, idx, i);
- sleep(2);
- }
- u_char *dst = (u_char *)NETMAP_BUF(ring, idx);
- ring->slot[i].len = size;
- pkt_copy(buf, dst, size);
- ring->cur = NETMAP_RING_NEXT(ring, i);
- ring->avail--;
- // if (ring->avail == 0) ioctl(me->fd, NIOCTXSYNC, NULL);
- return size;
- }
- errno = ENOBUFS;
- return -1;
-}
-
-int
-pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
-{
- struct pcap_ring *me = p;
- struct pollfd fds[1];
- int i;
-
- ND("cnt %d", cnt);
- memset(fds, 0, sizeof(fds));
- fds[0].fd = me->me.fd;
- fds[0].events = (POLLIN);
-
- while (cnt == -1 || cnt > 0) {
- if (poll(fds, 1, me->to_ms) <= 0) {
- D("poll error/timeout");
- continue;
- }
- i = pcap_dispatch(p, cnt, callback, user);
- if (cnt > 0)
- cnt -= i;
- }
- return 0;
-}
-
-#endif /* !TEST */
-
-#ifdef TEST /* build test code */
-void do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf)
-{
- pcap_inject((pcap_t *)user, buf, h->caplen);
-}
-
-/*
- * a simple pcap test program, bridge between two interfaces.
- */
-int
-main(int argc, char **argv)
-{
- pcap_t *p0, *p1;
- int burst = 1024;
- struct pollfd pollfd[2];
-
- fprintf(stderr, "%s %s built %s %s\n",
- argv[0], version, __DATE__, __TIME__);
-
- while (argc > 1 && !strcmp(argv[1], "-v")) {
- verbose++;
- argv++;
- argc--;
- }
-
- if (argc < 3 || argc > 4 || !strcmp(argv[1], argv[2])) {
- D("Usage: %s IFNAME1 IFNAME2 [BURST]", argv[0]);
- return (1);
- }
- if (argc > 3)
- burst = atoi(argv[3]);
-
- p0 = pcap_open_live(argv[1], 0, 1, 100, NULL);
- p1 = pcap_open_live(argv[2], 0, 1, 100, NULL);
- D("%s", version);
- D("open returns %p %p", p0, p1);
- if (!p0 || !p1)
- return(1);
- bzero(pollfd, sizeof(pollfd));
- pollfd[0].fd = pcap_fileno(p0);
- pollfd[1].fd = pcap_fileno(p1);
- pollfd[0].events = pollfd[1].events = POLLIN;
- for (;;) {
- /* do i need to reset ? */
- pollfd[0].revents = pollfd[1].revents = 0;
- int ret = poll(pollfd, 2, 1000);
- if (ret <= 0 || verbose)
- D("poll %s [0] ev %x %x [1] ev %x %x",
- ret <= 0 ? "timeout" : "ok",
- pollfd[0].events,
- pollfd[0].revents,
- pollfd[1].events,
- pollfd[1].revents);
- if (ret < 0)
- continue;
- if (pollfd[0].revents & POLLIN)
- pcap_dispatch(p0, burst, do_send, p1);
- if (pollfd[1].revents & POLLIN)
- pcap_dispatch(p1, burst, do_send, p0);
- }
-
- return (0);
-}
-#endif /* TEST */
diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c
index 901175e..8e78fa8 100644
--- a/tools/tools/netmap/pkt-gen.c
+++ b/tools/tools/netmap/pkt-gen.c
@@ -1,5 +1,6 @@
/*
- * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -25,7 +26,7 @@
/*
* $FreeBSD$
- * $Id$
+ * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $
*
* Example program to show how to build a multithreaded packet
* source/sink using the netmap device.
@@ -36,20 +37,94 @@
*
*/
-#include "nm_util.h"
+#define _GNU_SOURCE /* for CPU_SET() */
+#include <stdio.h>
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+
#include <ctype.h> // isprint()
+#include <unistd.h> // sysconf()
+#include <sys/poll.h>
+#include <arpa/inet.h> /* ntohs */
+#include <sys/sysctl.h> /* sysctl */
+#include <ifaddrs.h> /* getifaddrs */
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#include <pthread.h>
+
+#ifndef NO_PCAP
+#include <pcap/pcap.h>
+#endif
+
+#ifdef linux
+
+#define cpuset_t cpu_set_t
+
+#define ifr_flagshigh ifr_flags /* only the low 16 bits here */
+#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
+#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
+#include <netinet/ether.h> /* ether_aton */
+#include <linux/if_packet.h> /* sockaddr_ll */
+#endif /* linux */
-const char *default_payload="netmap pkt-gen payload\n"
+#ifdef __FreeBSD__
+#include <sys/endian.h> /* le64toh */
+#include <machine/param.h>
+
+#include <pthread_np.h> /* pthread w/ affinity */
+#include <sys/cpuset.h> /* cpu_set */
+#include <net/if_dl.h> /* LLADDR */
+#endif /* __FreeBSD__ */
+
+#ifdef __APPLE__
+
+#define cpuset_t uint64_t // XXX
+static inline void CPU_ZERO(cpuset_t *p)
+{
+ *p = 0;
+}
+
+static inline void CPU_SET(uint32_t i, cpuset_t *p)
+{
+ *p |= 1<< (i & 0x3f);
+}
+
+#define pthread_setaffinity_np(a, b, c) ((void)a, 0)
+
+#define ifr_flagshigh ifr_flags // XXX
+#define IFF_PPROMISC IFF_PROMISC
+#include <net/if_dl.h> /* LLADDR */
+#define clock_gettime(a,b) \
+ do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
+#endif /* __APPLE__ */
+
+const char *default_payload="netmap pkt-gen DIRECT payload\n"
"http://info.iet.unipi.it/~luigi/netmap/ ";
-int time_second; // support for RD() debugging macro
+const char *indirect_payload="netmap pkt-gen indirect payload\n"
+ "http://info.iet.unipi.it/~luigi/netmap/ ";
int verbose = 0;
-#define SKIP_PAYLOAD 1 /* do not check payload. */
+#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */
+
+
+#define VIRT_HDR_1 10 /* length of a base vnet-hdr */
+#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */
+#define VIRT_HDR_MAX VIRT_HDR_2
+struct virt_header {
+ uint8_t fields[VIRT_HDR_MAX];
+};
struct pkt {
+ struct virt_header vh;
struct ether_header eh;
struct ip ip;
struct udphdr udp;
@@ -58,8 +133,8 @@ struct pkt {
struct ip_range {
char *name;
- struct in_addr start, end, cur;
- uint16_t port0, port1, cur_p;
+ uint32_t start, end; /* same as struct in_addr */
+ uint16_t port0, port1;
};
struct mac_range {
@@ -67,6 +142,8 @@ struct mac_range {
struct ether_addr start, end;
};
+/* ifname can be netmap:foo-xxxx */
+#define MAX_IFNAMELEN 64 /* our buffer for ifname */
/*
* global arguments for all threads
*/
@@ -80,6 +157,7 @@ struct glob_arg {
int burst;
int forever;
int npackets; /* total packets to send */
+ int frags; /* fragments per packet */
int nthreads;
int cpus;
int options; /* testing */
@@ -91,18 +169,25 @@ struct glob_arg {
#define OPT_INDIRECT 32 /* use indirect buffers, tx only */
#define OPT_DUMP 64 /* dump rx/tx traffic */
int dev_type;
+#ifndef NO_PCAP
pcap_t *p;
+#endif
int tx_rate;
struct timespec tx_period;
int affinity;
int main_fd;
- int report_interval;
+ struct nm_desc *nmd;
+ uint64_t nmd_flags;
+ int report_interval; /* milliseconds between prints */
void *(*td_body)(void *);
void *mmap_addr;
- int mmap_size;
- char *ifname;
+ char ifname[MAX_IFNAMELEN];
+ char *nmr_config;
+ int dummy_send;
+ int virt_header; /* send also the virt_header */
+ int extra_bufs; /* goes in nr_arg3 */
};
enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
@@ -117,9 +202,7 @@ struct targ {
int completed;
int cancel;
int fd;
- struct nmreq nmr;
- struct netmap_if *nifp;
- uint16_t qfirst, qlast; /* range of queues to scan */
+ struct nm_desc *nmd;
volatile uint64_t count;
struct timespec tic, toc;
int me;
@@ -137,51 +220,67 @@ struct targ {
static void
extract_ip_range(struct ip_range *r)
{
- char *p_lo, *p_hi;
- char buf1[16]; // one ip address
-
- D("extract IP range from %s", r->name);
- p_lo = index(r->name, ':'); /* do we have ports ? */
- if (p_lo) {
- D(" found ports at %s", p_lo);
- *p_lo++ = '\0';
- p_hi = index(p_lo, '-');
- if (p_hi)
- *p_hi++ = '\0';
- else
- p_hi = p_lo;
- r->port0 = strtol(p_lo, NULL, 0);
- r->port1 = strtol(p_hi, NULL, 0);
- if (r->port1 < r->port0) {
- r->cur_p = r->port0;
- r->port0 = r->port1;
- r->port1 = r->cur_p;
+ char *ap, *pp;
+ struct in_addr a;
+
+ if (verbose)
+ D("extract IP range from %s", r->name);
+ r->port0 = r->port1 = 0;
+ r->start = r->end = 0;
+
+ /* the first - splits start/end of range */
+ ap = index(r->name, '-'); /* do we have ports ? */
+ if (ap) {
+ *ap++ = '\0';
+ }
+ /* grab the initial values (mandatory) */
+ pp = index(r->name, ':');
+ if (pp) {
+ *pp++ = '\0';
+ r->port0 = r->port1 = strtol(pp, NULL, 0);
+ };
+ inet_aton(r->name, &a);
+ r->start = r->end = ntohl(a.s_addr);
+ if (ap) {
+ pp = index(ap, ':');
+ if (pp) {
+ *pp++ = '\0';
+ if (*pp)
+ r->port1 = strtol(pp, NULL, 0);
+ }
+ if (*ap) {
+ inet_aton(ap, &a);
+ r->end = ntohl(a.s_addr);
}
- r->cur_p = r->port0;
- D("ports are %d to %d", r->port0, r->port1);
}
- p_hi = index(r->name, '-'); /* do we have upper ip ? */
- if (p_hi) {
- *p_hi++ = '\0';
- } else
- p_hi = r->name;
- inet_aton(r->name, &r->start);
- inet_aton(p_hi, &r->end);
- if (r->start.s_addr > r->end.s_addr) {
- r->cur = r->start;
+ if (r->port0 > r->port1) {
+ uint16_t tmp = r->port0;
+ r->port0 = r->port1;
+ r->port1 = tmp;
+ }
+ if (r->start > r->end) {
+ uint32_t tmp = r->start;
r->start = r->end;
- r->end = r->cur;
+ r->end = tmp;
+ }
+ {
+ struct in_addr a;
+ char buf1[16]; // one ip address
+
+ a.s_addr = htonl(r->end);
+ strncpy(buf1, inet_ntoa(a), sizeof(buf1));
+ a.s_addr = htonl(r->start);
+ if (1)
+ D("range is %s:%d to %s:%d",
+ inet_ntoa(a), r->port0, buf1, r->port1);
}
- r->cur = r->start;
- strncpy(buf1, inet_ntoa(r->end), sizeof(buf1));
- D("range is %s %d to %s %d", inet_ntoa(r->start), r->port0,
- buf1, r->port1);
}
static void
extract_mac_range(struct mac_range *r)
{
- D("extract MAC range from %s", r->name);
+ if (verbose)
+ D("extract MAC range from %s", r->name);
bcopy(ether_aton(r->name), &r->start, 6);
bcopy(ether_aton(r->name), &r->end, 6);
#if 0
@@ -196,7 +295,8 @@ extract_mac_range(struct mac_range *r)
if (p)
targ->dst_mac_range = atoi(p+1);
#endif
- D("%s starts at %s", r->name, ether_ntoa(&r->start));
+ if (verbose)
+ D("%s starts at %s", r->name, ether_ntoa(&r->start));
}
static struct targ *targs;
@@ -219,19 +319,17 @@ sigint_h(int sig)
static int
system_ncpus(void)
{
-#ifdef __FreeBSD__
- int mib[2], ncpus;
- size_t len;
-
- mib[0] = CTL_HW;
- mib[1] = HW_NCPU;
- len = sizeof(mib);
+ int ncpus;
+#if defined (__FreeBSD__)
+ int mib[2] = { CTL_HW, HW_NCPU };
+ size_t len = sizeof(mib);
sysctl(mib, 2, &ncpus, &len, NULL, 0);
-
+#elif defined(linux)
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+#else /* others */
+ ncpus = 1;
+#endif /* others */
return (ncpus);
-#else
- return 1;
-#endif /* !__FreeBSD__ */
}
#ifdef __linux__
@@ -256,6 +354,58 @@ system_ncpus(void)
/*
+ * parse the vale configuration in conf and put it in nmr.
+ * Return the flag set if necessary.
+ * The configuration may consist of 0 to 4 numbers separated
+ * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings.
+ * Missing numbers or zeroes stand for default values.
+ * As an additional convenience, if exactly one number
+ * is specified, then this is assigned to both #tx-slots and #rx-slots.
+ * If there is no 4th number, then the 3rd is assigned to both #tx-rings
+ * and #rx-rings.
+ */
+int
+parse_nmr_config(const char* conf, struct nmreq *nmr)
+{
+ char *w, *tok;
+ int i, v;
+
+ nmr->nr_tx_rings = nmr->nr_rx_rings = 0;
+ nmr->nr_tx_slots = nmr->nr_rx_slots = 0;
+ if (conf == NULL || ! *conf)
+ return 0;
+ w = strdup(conf);
+ for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) {
+ v = atoi(tok);
+ switch (i) {
+ case 0:
+ nmr->nr_tx_slots = nmr->nr_rx_slots = v;
+ break;
+ case 1:
+ nmr->nr_rx_slots = v;
+ break;
+ case 2:
+ nmr->nr_tx_rings = nmr->nr_rx_rings = v;
+ break;
+ case 3:
+ nmr->nr_rx_rings = v;
+ break;
+ default:
+ D("ignored config: %s", tok);
+ break;
+ }
+ }
+ D("txr %d txd %d rxr %d rxd %d",
+ nmr->nr_tx_rings, nmr->nr_tx_slots,
+ nmr->nr_rx_rings, nmr->nr_rx_slots);
+ free(w);
+ return (nmr->nr_tx_rings || nmr->nr_tx_slots ||
+ nmr->nr_rx_rings || nmr->nr_rx_slots) ?
+ NM_OPEN_RING_CFG : 0;
+}
+
+
+/*
* locate the src mac address for our interface, put it
* into the user-supplied buffer. return 0 if ok, -1 on error.
*/
@@ -296,7 +446,6 @@ source_hwaddr(const char *ifname, char *buf)
static int
setaffinity(pthread_t me, int i)
{
-#ifdef __FreeBSD__
cpuset_t cpumask;
if (i == -1)
@@ -307,13 +456,9 @@ setaffinity(pthread_t me, int i)
CPU_SET(i, &cpumask);
if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
- D("Unable to set affinity");
+ D("Unable to set affinity: %s", strerror(errno));
return 1;
}
-#else
- (void)me; /* suppress 'unused' warnings */
- (void)i;
-#endif /* __FreeBSD__ */
return 0;
}
@@ -360,8 +505,10 @@ dump_payload(char *p, int len, struct netmap_ring *ring, int cur)
int i, j, i0;
/* get the length in ASCII of the length of the packet. */
-
- printf("ring %p cur %5d len %5d buf %p\n", ring, cur, len, p);
+
+ printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n",
+ ring, cur, ring->slot[cur].buf_idx,
+ ring->slot[cur].flags, len);
/* hexdump routine */
for (i = 0; i < len; ) {
memset(buf, sizeof(buf), ' ');
@@ -389,6 +536,56 @@ dump_payload(char *p, int len, struct netmap_ring *ring, int cur)
#define uh_sum check
#endif /* linux */
+/*
+ * increment the addressed in the packet,
+ * starting from the least significant field.
+ * DST_IP DST_PORT SRC_IP SRC_PORT
+ */
+static void
+update_addresses(struct pkt *pkt, struct glob_arg *g)
+{
+ uint32_t a;
+ uint16_t p;
+ struct ip *ip = &pkt->ip;
+ struct udphdr *udp = &pkt->udp;
+
+ do {
+ p = ntohs(udp->uh_sport);
+ if (p < g->src_ip.port1) { /* just inc, no wrap */
+ udp->uh_sport = htons(p + 1);
+ break;
+ }
+ udp->uh_sport = htons(g->src_ip.port0);
+
+ a = ntohl(ip->ip_src.s_addr);
+ if (a < g->src_ip.end) { /* just inc, no wrap */
+ ip->ip_src.s_addr = htonl(a + 1);
+ break;
+ }
+ ip->ip_src.s_addr = htonl(g->src_ip.start);
+
+ udp->uh_sport = htons(g->src_ip.port0);
+ p = ntohs(udp->uh_dport);
+ if (p < g->dst_ip.port1) { /* just inc, no wrap */
+ udp->uh_dport = htons(p + 1);
+ break;
+ }
+ udp->uh_dport = htons(g->dst_ip.port0);
+
+ a = ntohl(ip->ip_dst.s_addr);
+ if (a < g->dst_ip.end) { /* just inc, no wrap */
+ ip->ip_dst.s_addr = htonl(a + 1);
+ break;
+ }
+ ip->ip_dst.s_addr = htonl(g->dst_ip.start);
+ } while (0);
+ // update checksum
+}
+
+/*
+ * initialize one packet and prepare for the next one.
+ * The copy could be done better instead of repeating it each time.
+ */
static void
initialize_packet(struct targ *targ)
{
@@ -398,17 +595,19 @@ initialize_packet(struct targ *targ)
struct udphdr *udp;
uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip);
const char *payload = targ->g->options & OPT_INDIRECT ?
- "XXXXXXXXXXXXXXXXXXXXXX" : default_payload;
- int i, l, l0 = strlen(payload);
-
- for (i = 0; i < paylen;) {
- l = min(l0, paylen - i);
- bcopy(payload, pkt->body + i, l);
- i += l;
+ indirect_payload : default_payload;
+ int i, l0 = strlen(payload);
+
+ /* create a nice NUL-terminated string */
+ for (i = 0; i < paylen; i += l0) {
+ if (l0 > paylen - i)
+ l0 = paylen - i; // last round
+ bcopy(payload, pkt->body + i, l0);
}
pkt->body[i-1] = '\0';
ip = &pkt->ip;
+ /* prepare the headers */
ip->ip_v = IPVERSION;
ip->ip_hl = 5;
ip->ip_id = 0;
@@ -418,22 +617,14 @@ initialize_packet(struct targ *targ)
ip->ip_off = htons(IP_DF); /* Don't fragment */
ip->ip_ttl = IPDEFTTL;
ip->ip_p = IPPROTO_UDP;
- ip->ip_dst.s_addr = targ->g->dst_ip.cur.s_addr;
- if (++targ->g->dst_ip.cur.s_addr > targ->g->dst_ip.end.s_addr)
- targ->g->dst_ip.cur.s_addr = targ->g->dst_ip.start.s_addr;
- ip->ip_src.s_addr = targ->g->src_ip.cur.s_addr;
- if (++targ->g->src_ip.cur.s_addr > targ->g->src_ip.end.s_addr)
- targ->g->src_ip.cur.s_addr = targ->g->src_ip.start.s_addr;
+ ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start);
+ ip->ip_src.s_addr = htonl(targ->g->src_ip.start);
ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0));
udp = &pkt->udp;
- udp->uh_sport = htons(targ->g->src_ip.cur_p);
- if (++targ->g->src_ip.cur_p > targ->g->src_ip.port1)
- targ->g->src_ip.cur_p = targ->g->src_ip.port0;
- udp->uh_dport = htons(targ->g->dst_ip.cur_p);
- if (++targ->g->dst_ip.cur_p > targ->g->dst_ip.port1)
- targ->g->dst_ip.cur_p = targ->g->dst_ip.port0;
+ udp->uh_sport = htons(targ->g->src_ip.port0);
+ udp->uh_dport = htons(targ->g->dst_ip.port0);
udp->uh_ulen = htons(paylen);
/* Magic: taken from sbin/dhclient/packet.c */
udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp),
@@ -449,6 +640,8 @@ initialize_packet(struct targ *targ)
bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
eh->ether_type = htons(ETHERTYPE_IP);
+
+ bzero(&pkt->vh, sizeof(pkt->vh));
// dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
}
@@ -460,49 +653,65 @@ initialize_packet(struct targ *targ)
* an interrupt when done.
*/
static int
-send_packets(struct netmap_ring *ring, struct pkt *pkt,
- int size, u_int count, int options)
+send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame,
+ int size, struct glob_arg *g, u_int count, int options,
+ u_int nfrags)
{
- u_int sent, cur = ring->cur;
-
- if (ring->avail < count)
- count = ring->avail;
-
+ u_int n, sent, cur = ring->cur;
+ u_int fcnt;
+
+ n = nm_ring_space(ring);
+ if (n < count)
+ count = n;
+ if (count < nfrags) {
+ D("truncating packet, no room for frags %d %d",
+ count, nfrags);
+ }
#if 0
if (options & (OPT_COPY | OPT_PREFETCH) ) {
for (sent = 0; sent < count; sent++) {
struct netmap_slot *slot = &ring->slot[cur];
char *p = NETMAP_BUF(ring, slot->buf_idx);
- prefetch(p);
- cur = NETMAP_RING_NEXT(ring, cur);
+ __builtin_prefetch(p);
+ cur = nm_ring_next(ring, cur);
}
cur = ring->cur;
}
#endif
- for (sent = 0; sent < count; sent++) {
+ for (fcnt = nfrags, sent = 0; sent < count; sent++) {
struct netmap_slot *slot = &ring->slot[cur];
char *p = NETMAP_BUF(ring, slot->buf_idx);
slot->flags = 0;
- if (options & OPT_DUMP)
- dump_payload(p, size, ring, cur);
if (options & OPT_INDIRECT) {
slot->flags |= NS_INDIRECT;
- *((struct pkt **)(void *)p) = pkt;
- } else if (options & OPT_COPY)
- pkt_copy(pkt, p, size);
- else if (options & OPT_MEMCPY)
- memcpy(p, pkt, size);
- else if (options & OPT_PREFETCH)
- prefetch(p);
+ slot->ptr = (uint64_t)frame;
+ } else if (options & OPT_COPY) {
+ nm_pkt_copy(frame, p, size);
+ if (fcnt == nfrags)
+ update_addresses(pkt, g);
+ } else if (options & OPT_MEMCPY) {
+ memcpy(p, frame, size);
+ if (fcnt == nfrags)
+ update_addresses(pkt, g);
+ } else if (options & OPT_PREFETCH) {
+ __builtin_prefetch(p);
+ }
+ if (options & OPT_DUMP)
+ dump_payload(p, size, ring, cur);
slot->len = size;
- if (sent == count - 1)
+ if (--fcnt > 0)
+ slot->flags |= NS_MOREFRAG;
+ else
+ fcnt = nfrags;
+ if (sent == count - 1) {
+ slot->flags &= ~NS_MOREFRAG;
slot->flags |= NS_REPORT;
- cur = NETMAP_RING_NEXT(ring, cur);
+ }
+ cur = nm_ring_next(ring, cur);
}
- ring->avail -= sent;
- ring->cur = cur;
+ ring->head = ring->cur = cur;
return (sent);
}
@@ -518,52 +727,58 @@ static void *
pinger_body(void *data)
{
struct targ *targ = (struct targ *) data;
- struct pollfd fds[1];
- struct netmap_if *nifp = targ->nifp;
+ struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
+ struct netmap_if *nifp = targ->nmd->nifp;
int i, rx = 0, n = targ->g->npackets;
-
- fds[0].fd = targ->fd;
- fds[0].events = (POLLIN);
- static uint32_t sent;
+ void *frame;
+ int size;
+ uint32_t sent = 0;
struct timespec ts, now, last_print;
uint32_t count = 0, min = 1000000000, av = 0;
+ frame = &targ->pkt;
+ frame += sizeof(targ->pkt.vh) - targ->g->virt_header;
+ size = targ->g->pkt_size + targ->g->virt_header;
+
+
if (targ->g->nthreads > 1) {
D("can only ping with 1 thread");
return NULL;
}
clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
+ now = last_print;
while (n == 0 || (int)sent < n) {
struct netmap_ring *ring = NETMAP_TXRING(nifp, 0);
struct netmap_slot *slot;
char *p;
- for (i = 0; i < 1; i++) {
+ for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */
slot = &ring->slot[ring->cur];
- slot->len = targ->g->pkt_size;
+ slot->len = size;
p = NETMAP_BUF(ring, slot->buf_idx);
- if (ring->avail == 0) {
+ if (nm_ring_empty(ring)) {
D("-- ouch, cannot send");
} else {
- pkt_copy(&targ->pkt, p, targ->g->pkt_size);
+ nm_pkt_copy(frame, p, size);
clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
bcopy(&sent, p+42, sizeof(sent));
bcopy(&ts, p+46, sizeof(ts));
sent++;
- ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
- ring->avail--;
+ ring->head = ring->cur = nm_ring_next(ring, ring->cur);
}
}
/* should use a parameter to decide how often to send */
- if (poll(fds, 1, 3000) <= 0) {
- D("poll error/timeout on queue %d", targ->me);
+ if (poll(&pfd, 1, 3000) <= 0) {
+ D("poll error/timeout on queue %d: %s", targ->me,
+ strerror(errno));
continue;
}
/* see what we got back */
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ for (i = targ->nmd->first_tx_ring;
+ i <= targ->nmd->last_tx_ring; i++) {
ring = NETMAP_RXRING(nifp, i);
- while (ring->avail > 0) {
+ while (!nm_ring_empty(ring)) {
uint32_t seq;
slot = &ring->slot[ring->cur];
p = NETMAP_BUF(ring, slot->buf_idx);
@@ -583,8 +798,7 @@ pinger_body(void *data)
min = ts.tv_nsec;
count ++;
av += ts.tv_nsec;
- ring->avail--;
- ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
+ ring->head = ring->cur = nm_ring_next(ring, ring->cur);
rx++;
}
}
@@ -616,12 +830,10 @@ static void *
ponger_body(void *data)
{
struct targ *targ = (struct targ *) data;
- struct pollfd fds[1];
- struct netmap_if *nifp = targ->nifp;
+ struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
+ struct netmap_if *nifp = targ->nmd->nifp;
struct netmap_ring *txring, *rxring;
int i, rx = 0, sent = 0, n = targ->g->npackets;
- fds[0].fd = targ->fd;
- fds[0].events = (POLLIN);
if (targ->g->nthreads > 1) {
D("can only reply ping with 1 thread");
@@ -632,28 +844,28 @@ ponger_body(void *data)
uint32_t txcur, txavail;
//#define BUSYWAIT
#ifdef BUSYWAIT
- ioctl(fds[0].fd, NIOCRXSYNC, NULL);
+ ioctl(pfd.fd, NIOCRXSYNC, NULL);
#else
- if (poll(fds, 1, 1000) <= 0) {
- D("poll error/timeout on queue %d", targ->me);
+ if (poll(&pfd, 1, 1000) <= 0) {
+ D("poll error/timeout on queue %d: %s", targ->me,
+ strerror(errno));
continue;
}
#endif
txring = NETMAP_TXRING(nifp, 0);
txcur = txring->cur;
- txavail = txring->avail;
+ txavail = nm_ring_space(txring);
/* see what we got back */
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
rxring = NETMAP_RXRING(nifp, i);
- while (rxring->avail > 0) {
+ while (!nm_ring_empty(rxring)) {
uint16_t *spkt, *dpkt;
uint32_t cur = rxring->cur;
struct netmap_slot *slot = &rxring->slot[cur];
char *src, *dst;
src = NETMAP_BUF(rxring, slot->buf_idx);
//D("got pkt %p of size %d", src, slot->len);
- rxring->avail--;
- rxring->cur = NETMAP_RING_NEXT(rxring, cur);
+ rxring->head = rxring->cur = nm_ring_next(rxring, cur);
rx++;
if (txavail == 0)
continue;
@@ -662,7 +874,7 @@ ponger_body(void *data)
/* copy... */
dpkt = (uint16_t *)dst;
spkt = (uint16_t *)src;
- pkt_copy(src, dst, slot->len);
+ nm_pkt_copy(src, dst, slot->len);
dpkt[0] = spkt[3];
dpkt[1] = spkt[4];
dpkt[2] = spkt[5];
@@ -671,16 +883,15 @@ ponger_body(void *data)
dpkt[5] = spkt[2];
txring->slot[txcur].len = slot->len;
/* XXX swap src dst mac */
- txcur = NETMAP_RING_NEXT(txring, txcur);
+ txcur = nm_ring_next(txring, txcur);
txavail--;
sent++;
}
}
- txring->cur = txcur;
- txring->avail = txavail;
+ txring->head = txring->cur = txcur;
targ->count = sent;
#ifdef BUSYWAIT
- ioctl(fds[0].fd, NIOCTXSYNC, NULL);
+ ioctl(pfd.fd, NIOCTXSYNC, NULL);
#endif
//D("tx %d rx %d", sent, rx);
}
@@ -721,124 +932,129 @@ timespec2val(const struct timespec *a)
}
-static int
-wait_time(struct timespec ts, struct timespec *wakeup_ts, long long *waited)
+static __inline struct timespec
+timespec_add(struct timespec a, struct timespec b)
{
- struct timespec curtime;
-
- curtime.tv_sec = 0;
- curtime.tv_nsec = 0;
-
- if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) {
- D("clock_gettime: %s", strerror(errno));
- return (-1);
- }
- while (timespec_ge(&ts, &curtime)) {
- if (waited != NULL)
- (*waited)++;
- if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) {
- D("clock_gettime");
- return (-1);
- }
+ struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec };
+ if (ret.tv_nsec >= 1000000000) {
+ ret.tv_sec++;
+ ret.tv_nsec -= 1000000000;
}
- if (wakeup_ts != NULL)
- *wakeup_ts = curtime;
- return (0);
+ return ret;
}
-static __inline void
-timespec_add(struct timespec *tsa, struct timespec *tsb)
+static __inline struct timespec
+timespec_sub(struct timespec a, struct timespec b)
{
- tsa->tv_sec += tsb->tv_sec;
- tsa->tv_nsec += tsb->tv_nsec;
- if (tsa->tv_nsec >= 1000000000) {
- tsa->tv_sec++;
- tsa->tv_nsec -= 1000000000;
+ struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec };
+ if (ret.tv_nsec < 0) {
+ ret.tv_sec--;
+ ret.tv_nsec += 1000000000;
}
+ return ret;
}
+/*
+ * wait until ts, either busy or sleeping if more than 1ms.
+ * Return wakeup time.
+ */
+static struct timespec
+wait_time(struct timespec ts)
+{
+ for (;;) {
+ struct timespec w, cur;
+ clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
+ w = timespec_sub(ts, cur);
+ if (w.tv_sec < 0)
+ return cur;
+ else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
+ poll(NULL, 0, 1);
+ }
+}
+
static void *
sender_body(void *data)
{
struct targ *targ = (struct targ *) data;
-
- struct pollfd fds[1];
- struct netmap_if *nifp = targ->nifp;
+ struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT };
+ struct netmap_if *nifp = targ->nmd->nifp;
struct netmap_ring *txring;
- int i, n = targ->g->npackets / targ->g->nthreads, sent = 0;
+ int i, n = targ->g->npackets / targ->g->nthreads;
+ int64_t sent = 0;
int options = targ->g->options | OPT_COPY;
- struct timespec tmptime, nexttime = { 0, 0}; // XXX silence compiler
+ struct timespec nexttime = { 0, 0}; // XXX silence compiler
int rate_limit = targ->g->tx_rate;
- long long waited = 0;
+ struct pkt *pkt = &targ->pkt;
+ void *frame;
+ int size;
+
+ frame = pkt;
+ frame += sizeof(pkt->vh) - targ->g->virt_header;
+ size = targ->g->pkt_size + targ->g->virt_header;
D("start");
if (setaffinity(targ->thread, targ->affinity))
goto quit;
- /* setup poll(2) mechanism. */
- memset(fds, 0, sizeof(fds));
- fds[0].fd = targ->fd;
- fds[0].events = (POLLOUT);
/* main loop.*/
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
if (rate_limit) {
- tmptime.tv_sec = 2;
- tmptime.tv_nsec = 0;
- timespec_add(&targ->tic, &tmptime);
+ targ->tic = timespec_add(targ->tic, (struct timespec){2,0});
targ->tic.tv_nsec = 0;
- if (wait_time(targ->tic, NULL, NULL) == -1) {
- D("wait_time: %s", strerror(errno));
- goto quit;
- }
+ wait_time(targ->tic);
nexttime = targ->tic;
}
- if (targ->g->dev_type == DEV_PCAP) {
- int size = targ->g->pkt_size;
- void *pkt = &targ->pkt;
- pcap_t *p = targ->g->p;
+ if (targ->g->dev_type == DEV_TAP) {
+ D("writing to file desc %d", targ->g->main_fd);
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
- if (pcap_inject(p, pkt, size) != -1)
+ if (write(targ->g->main_fd, frame, size) != -1)
sent++;
+ update_addresses(pkt, targ->g);
if (i > 10000) {
targ->count = sent;
i = 0;
}
}
- } else if (targ->g->dev_type == DEV_TAP) { /* tap */
- int size = targ->g->pkt_size;
- void *pkt = &targ->pkt;
- D("writing to file desc %d", targ->g->main_fd);
+#ifndef NO_PCAP
+ } else if (targ->g->dev_type == DEV_PCAP) {
+ pcap_t *p = targ->g->p;
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
- if (write(targ->g->main_fd, pkt, size) != -1)
+ if (pcap_inject(p, frame, size) != -1)
sent++;
+ update_addresses(pkt, targ->g);
if (i > 10000) {
targ->count = sent;
i = 0;
}
}
+#endif /* NO_PCAP */
} else {
int tosend = 0;
+ int frags = targ->g->frags;
+
while (!targ->cancel && (n == 0 || sent < n)) {
if (rate_limit && tosend <= 0) {
tosend = targ->g->burst;
- timespec_add(&nexttime, &targ->g->tx_period);
- if (wait_time(nexttime, &tmptime, &waited) == -1) {
- D("wait_time");
- goto quit;
- }
+ nexttime = timespec_add(nexttime, targ->g->tx_period);
+ wait_time(nexttime);
}
/*
* wait for available room in the send queue(s)
*/
- if (poll(fds, 1, 2000) <= 0) {
+ if (poll(&pfd, 1, 2000) <= 0) {
if (targ->cancel)
break;
- D("poll error/timeout on queue %d", targ->me);
+ D("poll error/timeout on queue %d: %s", targ->me,
+ strerror(errno));
+ // goto quit;
+ }
+ if (pfd.revents & POLLERR) {
+ D("poll error");
goto quit;
}
/*
@@ -848,32 +1064,41 @@ sender_body(void *data)
D("drop copy");
options &= ~OPT_COPY;
}
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
int m, limit = rate_limit ? tosend : targ->g->burst;
if (n > 0 && n - sent < limit)
limit = n - sent;
txring = NETMAP_TXRING(nifp, i);
- if (txring->avail == 0)
+ if (nm_ring_empty(txring))
continue;
- m = send_packets(txring, &targ->pkt, targ->g->pkt_size,
- limit, options);
+ if (frags > 1)
+ limit = ((limit + frags - 1) / frags) * frags;
+
+ m = send_packets(txring, pkt, frame, size, targ->g,
+ limit, options, frags);
+ ND("limit %d tail %d frags %d m %d",
+ limit, txring->tail, frags, m);
sent += m;
- tosend -= m;
targ->count = sent;
+ if (rate_limit) {
+ tosend -= m;
+ if (tosend <= 0)
+ break;
+ }
}
}
/* flush any remaining packets */
- ioctl(fds[0].fd, NIOCTXSYNC, NULL);
+ ioctl(pfd.fd, NIOCTXSYNC, NULL);
/* final part: wait all the TX queues to be empty. */
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
txring = NETMAP_TXRING(nifp, i);
- while (!NETMAP_TX_RING_EMPTY(txring)) {
- ioctl(fds[0].fd, NIOCTXSYNC, NULL);
+ while (nm_tx_pending(txring)) {
+ ioctl(pfd.fd, NIOCTXSYNC, NULL);
usleep(1); /* wait 1 tick */
}
}
- }
+ } /* end DEV_NETMAP */
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
targ->completed = 1;
@@ -887,6 +1112,7 @@ quit:
}
+#ifndef NO_PCAP
static void
receive_pcap(u_char *user, const struct pcap_pkthdr * h,
const u_char * bytes)
@@ -896,27 +1122,27 @@ receive_pcap(u_char *user, const struct pcap_pkthdr * h,
(void)bytes; /* UNUSED */
(*count)++;
}
+#endif /* !NO_PCAP */
static int
receive_packets(struct netmap_ring *ring, u_int limit, int dump)
{
- u_int cur, rx;
+ u_int cur, rx, n;
cur = ring->cur;
- if (ring->avail < limit)
- limit = ring->avail;
+ n = nm_ring_space(ring);
+ if (n < limit)
+ limit = n;
for (rx = 0; rx < limit; rx++) {
struct netmap_slot *slot = &ring->slot[cur];
char *p = NETMAP_BUF(ring, slot->buf_idx);
- slot->flags = OPT_INDIRECT; // XXX
if (dump)
dump_payload(p, slot->len, ring, cur);
- cur = NETMAP_RING_NEXT(ring, cur);
+ cur = nm_ring_next(ring, cur);
}
- ring->avail -= rx;
- ring->cur = cur;
+ ring->head = ring->cur = cur;
return (rx);
}
@@ -925,8 +1151,8 @@ static void *
receiver_body(void *data)
{
struct targ *targ = (struct targ *) data;
- struct pollfd fds[1];
- struct netmap_if *nifp = targ->nifp;
+ struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
+ struct netmap_if *nifp = targ->nmd->nifp;
struct netmap_ring *rxring;
int i;
uint64_t received = 0;
@@ -934,27 +1160,18 @@ receiver_body(void *data)
if (setaffinity(targ->thread, targ->affinity))
goto quit;
- /* setup poll(2) mechanism. */
- memset(fds, 0, sizeof(fds));
- fds[0].fd = targ->fd;
- fds[0].events = (POLLIN);
-
/* unbounded wait for the first packet. */
for (;;) {
- i = poll(fds, 1, 1000);
- if (i > 0 && !(fds[0].revents & POLLERR))
+ i = poll(&pfd, 1, 1000);
+ if (i > 0 && !(pfd.revents & POLLERR))
break;
- D("waiting for initial packets, poll returns %d %d", i, fds[0].revents);
+ RD(1, "waiting for initial packets, poll returns %d %d",
+ i, pfd.revents);
}
/* main loop, exit after 1s silence */
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
- if (targ->g->dev_type == DEV_PCAP) {
- while (!targ->cancel) {
- /* XXX should we poll ? */
- pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
- }
- } else if (targ->g->dev_type == DEV_TAP) {
+ if (targ->g->dev_type == DEV_TAP) {
D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd);
while (!targ->cancel) {
char buf[2048];
@@ -962,34 +1179,46 @@ receiver_body(void *data)
if (read(targ->g->main_fd, buf, sizeof(buf)) > 0)
targ->count++;
}
+#ifndef NO_PCAP
+ } else if (targ->g->dev_type == DEV_PCAP) {
+ while (!targ->cancel) {
+ /* XXX should we poll ? */
+ pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
+ }
+#endif /* !NO_PCAP */
} else {
int dump = targ->g->options & OPT_DUMP;
while (!targ->cancel) {
/* Once we started to receive packets, wait at most 1 seconds
before quitting. */
- if (poll(fds, 1, 1 * 1000) <= 0 && !targ->g->forever) {
+ if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
targ->toc.tv_sec -= 1; /* Subtract timeout time. */
- break;
+ goto out;
}
- for (i = targ->qfirst; i < targ->qlast; i++) {
+ if (pfd.revents & POLLERR) {
+ D("poll err");
+ goto quit;
+ }
+
+ for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
int m;
rxring = NETMAP_RXRING(nifp, i);
- if (rxring->avail == 0)
+ if (nm_ring_empty(rxring))
continue;
m = receive_packets(rxring, targ->g->burst, dump);
received += m;
}
targ->count = received;
-
- // tell the card we have read the data
- //ioctl(fds[0].fd, NIOCRXSYNC, NULL);
}
}
+ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
+
+out:
targ->completed = 1;
targ->count = received;
@@ -1006,10 +1235,10 @@ quit:
static const char *
norm(char *buf, double val)
{
- char *units[] = { "", "K", "M", "G" };
+ char *units[] = { "", "K", "M", "G", "T" };
u_int i;
- for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++)
+ for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
val /= 1000;
sprintf(buf, "%.2f %s", val, units[i]);
return buf;
@@ -1021,8 +1250,8 @@ tx_output(uint64_t sent, int size, double delta)
double bw, raw_bw, pps;
char b1[40], b2[80], b3[80];
- printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n",
- sent, size, delta);
+ printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n",
+ (unsigned long long)sent, size, delta);
if (delta == 0)
delta = 1e-6;
if (size < 60) /* correct for min packet size */
@@ -1043,7 +1272,8 @@ rx_output(uint64_t received, double delta)
double pps;
char b1[40];
- printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta);
+ printf("Received %llu packets, in %.2f seconds.\n",
+ (unsigned long long) received, delta);
if (delta == 0)
delta = 1e-6;
@@ -1063,18 +1293,21 @@ usage(void)
"\t-n count number of iterations (can be 0)\n"
"\t-t pkts_to_send also forces tx mode\n"
"\t-r pkts_to_receive also forces rx mode\n"
- "\t-l pkts_size in bytes excluding CRC\n"
- "\t-d dst-ip end with %%n to sweep n addresses\n"
- "\t-s src-ip end with %%n to sweep n addresses\n"
- "\t-D dst-mac end with %%n to sweep n addresses\n"
- "\t-S src-mac end with %%n to sweep n addresses\n"
+ "\t-l pkt_size in bytes excluding CRC\n"
+ "\t-d dst_ip[:port[-dst_ip:port]] single or range\n"
+ "\t-s src_ip[:port[-src_ip:port]] single or range\n"
+ "\t-D dst-mac\n"
+ "\t-S src-mac\n"
"\t-a cpu_id use setaffinity\n"
"\t-b burst size testing, mostly\n"
"\t-c cores cores to use\n"
"\t-p threads processes/threads to use\n"
"\t-T report_ms milliseconds between reports\n"
- "\t-P use libpcap instead of netmap\n"
+ "\t-P use libpcap instead of netmap\n"
"\t-w wait_for_link_time in seconds\n"
+ "\t-R rate in packets per second\n"
+ "\t-X dump payload\n"
+ "\t-H len add empty virtio-net-header with size 'len'\n"
"",
cmd);
@@ -1092,65 +1325,57 @@ start_threads(struct glob_arg *g)
* using a single descriptor.
*/
for (i = 0; i < g->nthreads; i++) {
- bzero(&targs[i], sizeof(targs[i]));
- targs[i].fd = -1; /* default, with pcap */
- targs[i].g = g;
+ struct targ *t = &targs[i];
+
+ bzero(t, sizeof(*t));
+ t->fd = -1; /* default, with pcap */
+ t->g = g;
if (g->dev_type == DEV_NETMAP) {
- struct nmreq tifreq;
- int tfd;
+ struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
- /* register interface. */
- tfd = open("/dev/netmap", O_RDWR);
- if (tfd == -1) {
- D("Unable to open /dev/netmap");
- continue;
+ if (g->nthreads > 1) {
+ if (nmd.req.nr_flags != NR_REG_ALL_NIC) {
+ D("invalid nthreads mode %d", nmd.req.nr_flags);
+ continue;
+ }
+ nmd.req.nr_flags = NR_REG_ONE_NIC;
+ nmd.req.nr_ringid = i;
}
- targs[i].fd = tfd;
-
- bzero(&tifreq, sizeof(tifreq));
- strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name));
- tifreq.nr_version = NETMAP_API;
- tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
+ /* Only touch one of the rings (rx is already ok) */
+ if (g->td_body == receiver_body)
+ nmd.req.nr_ringid |= NETMAP_NO_TX_POLL;
- /*
- * if we are acting as a receiver only, do not touch the transmit ring.
- * This is not the default because many apps may use the interface
- * in both directions, but a pure receiver does not.
- */
- if (g->td_body == receiver_body) {
- tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
- }
+ /* register interface. Override ifname and ringid etc. */
- if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
- D("Unable to register %s", g->ifname);
+ t->nmd = nm_open(t->g->ifname, NULL, g->nmd_flags |
+ NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, g->nmd);
+ if (t->nmd == NULL) {
+ D("Unable to open %s: %s",
+ t->g->ifname, strerror(errno));
continue;
}
- targs[i].nmr = tifreq;
- targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset);
- /* start threads. */
- targs[i].qfirst = (g->nthreads > 1) ? i : 0;
- targs[i].qlast = (g->nthreads > 1) ? i+1 :
- (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
+ t->fd = t->nmd->fd;
+
} else {
targs[i].fd = g->main_fd;
}
- targs[i].used = 1;
- targs[i].me = i;
+ t->used = 1;
+ t->me = i;
if (g->affinity >= 0) {
if (g->affinity < g->cpus)
- targs[i].affinity = g->affinity;
+ t->affinity = g->affinity;
else
- targs[i].affinity = i % g->cpus;
- } else
- targs[i].affinity = -1;
+ t->affinity = i % g->cpus;
+ } else {
+ t->affinity = -1;
+ }
/* default, init packets */
- initialize_packet(&targs[i]);
+ initialize_packet(t);
- if (pthread_create(&targs[i].thread, NULL, g->td_body,
- &targs[i]) == -1) {
- D("Unable to create thread %d", i);
- targs[i].used = 0;
+ if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) {
+ D("Unable to create thread %d: %s", i, strerror(errno));
+ t->used = 0;
}
}
}
@@ -1175,7 +1400,6 @@ main_thread(struct glob_arg *g)
delta.tv_usec = (g->report_interval%1000)*1000;
select(0, NULL, NULL, NULL, &delta);
gettimeofday(&now, NULL);
- time_second = now.tv_sec;
timersub(&now, &toc, &toc);
my_count = 0;
for (i = 0; i < g->nthreads; i++) {
@@ -1188,8 +1412,10 @@ main_thread(struct glob_arg *g)
continue;
npkts = my_count - prev;
pps = (npkts*1000000 + usec/2) / usec;
- D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)",
- pps, npkts, usec);
+ D("%llu pps (%llu pkts in %llu usec)",
+ (unsigned long long)pps,
+ (unsigned long long)npkts,
+ (unsigned long long)usec);
prev = my_count;
toc = now;
if (done == g->nthreads)
@@ -1233,7 +1459,7 @@ main_thread(struct glob_arg *g)
rx_output(count, delta_t);
if (g->dev_type == DEV_NETMAP) {
- munmap(g->mmap_addr, g->mmap_size);
+ munmap(g->nmd->mem, g->nmd->req.nr_memsize);
close(g->main_fd);
}
}
@@ -1296,7 +1522,7 @@ tap_alloc(char *dev)
/* try to create the device */
if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
- D("failed to to a TUNSETIFF");
+ D("failed to to a TUNSETIFF: %s", strerror(errno));
close(fd);
return err;
}
@@ -1321,7 +1547,6 @@ main(int arc, char **argv)
struct glob_arg g;
- struct nmreq nmr;
int ch;
int wait_link = 2;
int devqueues = 1; /* how many device queues */
@@ -1343,9 +1568,12 @@ main(int arc, char **argv)
g.cpus = 1;
g.forever = 1;
g.tx_rate = 0;
+ g.frags = 1;
+ g.nmr_config = "";
+ g.virt_header = 0;
while ( (ch = getopt(arc, argv,
- "a:f:n:i:It:r:l:d:s:D:S:b:c:o:p:PT:w:WvR:X")) != -1) {
+ "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:")) != -1) {
struct sf *fn;
switch(ch) {
@@ -1358,6 +1586,15 @@ main(int arc, char **argv)
g.npackets = atoi(optarg);
break;
+ case 'F':
+ i = atoi(optarg);
+ if (i < 1 || i > 63) {
+ D("invalid frags %d [1..63], ignore", i);
+ break;
+ }
+ g.frags = i;
+ break;
+
case 'f':
for (fn = func; fn->key; fn++) {
if (!strcmp(fn->key, optarg))
@@ -1378,29 +1615,39 @@ main(int arc, char **argv)
break;
case 'i': /* interface */
- g.ifname = optarg;
- if (!strncmp(optarg, "tap", 3))
+ /* a prefix of tap: netmap: or pcap: forces the mode.
+ * otherwise we guess
+ */
+ D("interface is %s", optarg);
+ if (strlen(optarg) > MAX_IFNAMELEN - 8) {
+ D("ifname too long %s", optarg);
+ break;
+ }
+ strcpy(g.ifname, optarg);
+ if (!strcmp(optarg, "null")) {
+ g.dev_type = DEV_NETMAP;
+ g.dummy_send = 1;
+ } else if (!strncmp(optarg, "tap:", 4)) {
g.dev_type = DEV_TAP;
- else
+ strcpy(g.ifname, optarg + 4);
+ } else if (!strncmp(optarg, "pcap:", 5)) {
+ g.dev_type = DEV_PCAP;
+ strcpy(g.ifname, optarg + 5);
+ } else if (!strncmp(optarg, "netmap:", 7) ||
+ !strncmp(optarg, "vale", 4)) {
g.dev_type = DEV_NETMAP;
+ } else if (!strncmp(optarg, "tap", 3)) {
+ g.dev_type = DEV_TAP;
+ } else { /* prepend netmap: */
+ g.dev_type = DEV_NETMAP;
+ sprintf(g.ifname, "netmap:%s", optarg);
+ }
break;
case 'I':
g.options |= OPT_INDIRECT; /* XXX use indirect buffer */
break;
- case 't': /* send, deprecated */
- D("-t deprecated, please use -f tx -n %s", optarg);
- g.td_body = sender_body;
- g.npackets = atoi(optarg);
- break;
-
- case 'r': /* receive */
- D("-r deprecated, please use -f rx -n %s", optarg);
- g.td_body = receiver_body;
- g.npackets = atoi(optarg);
- break;
-
case 'l': /* pkt_size */
g.pkt_size = atoi(optarg);
break;
@@ -1435,10 +1682,6 @@ main(int arc, char **argv)
g.nthreads = atoi(optarg);
break;
- case 'P':
- g.dev_type = DEV_PCAP;
- break;
-
case 'D': /* destination mac */
g.dst_mac.name = optarg;
break;
@@ -1454,6 +1697,16 @@ main(int arc, char **argv)
break;
case 'X':
g.options |= OPT_DUMP;
+ break;
+ case 'C':
+ g.nmr_config = strdup(optarg);
+ break;
+ case 'H':
+ g.virt_header = atoi(optarg);
+ break;
+ case 'e': /* extra bufs */
+ g.extra_bufs = atoi(optarg);
+ break;
}
}
@@ -1490,6 +1743,18 @@ main(int arc, char **argv)
extract_mac_range(&g.src_mac);
extract_mac_range(&g.dst_mac);
+ if (g.src_ip.start != g.src_ip.end ||
+ g.src_ip.port0 != g.src_ip.port1 ||
+ g.dst_ip.start != g.dst_ip.end ||
+ g.dst_ip.port0 != g.dst_ip.port1)
+ g.options |= OPT_COPY;
+
+ if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1
+ && g.virt_header != VIRT_HDR_2) {
+ D("bad virtio-net-header length");
+ usage();
+ }
+
if (g.dev_type == DEV_TAP) {
D("want to use tap %s", g.ifname);
g.main_fd = tap_alloc(g.ifname);
@@ -1497,7 +1762,8 @@ main(int arc, char **argv)
D("cannot open tap %s", g.ifname);
usage();
}
- } else if (g.dev_type > DEV_NETMAP) {
+#ifndef NO_PCAP
+ } else if (g.dev_type == DEV_PCAP) {
char pcap_errbuf[PCAP_ERRBUF_SIZE];
D("using pcap on %s", g.ifname);
@@ -1507,36 +1773,37 @@ main(int arc, char **argv)
D("cannot open pcap on %s", g.ifname);
usage();
}
+#endif /* !NO_PCAP */
+ } else if (g.dummy_send) { /* but DEV_NETMAP */
+ D("using a dummy send routine");
} else {
- bzero(&nmr, sizeof(nmr));
- nmr.nr_version = NETMAP_API;
+ struct nm_desc base_nmd;
+
+ bzero(&base_nmd, sizeof(base_nmd));
+
+ g.nmd_flags = 0;
+ g.nmd_flags |= parse_nmr_config(g.nmr_config, &base_nmd.req);
+ if (g.extra_bufs) {
+ base_nmd.req.nr_arg3 = g.extra_bufs;
+ g.nmd_flags |= NM_OPEN_ARG3;
+ }
+
/*
- * Open the netmap device to fetch the number of queues of our
- * interface.
+ * Open the netmap device using nm_open().
*
- * The first NIOCREGIF also detaches the card from the
* protocol stack and may cause a reset of the card,
* which in turn may take some time for the PHY to
- * reconfigure.
+ * reconfigure. We do the open here to have time to reset.
*/
- g.main_fd = open("/dev/netmap", O_RDWR);
- if (g.main_fd == -1) {
- D("Unable to open /dev/netmap");
- // fail later
- } else {
- if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
- D("Unable to get if info without name");
- } else {
- D("map size is %d Kb", nmr.nr_memsize >> 10);
- }
- bzero(&nmr, sizeof(nmr));
- nmr.nr_version = NETMAP_API;
- strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name));
- if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
- D("Unable to get if info for %s", g.ifname);
- }
- devqueues = nmr.nr_rx_rings;
+ g.nmd = nm_open(g.ifname, NULL, g.nmd_flags, &base_nmd);
+ if (g.nmd == NULL) {
+ D("Unable to open %s: %s", g.ifname, strerror(errno));
+ goto out;
}
+ g.main_fd = g.nmd->fd;
+ D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem);
+
+ devqueues = g.nmd->req.nr_rx_rings;
/* validate provided nthreads. */
if (g.nthreads < 1 || g.nthreads > devqueues) {
@@ -1544,36 +1811,23 @@ main(int arc, char **argv)
// continue, fail later
}
- /*
- * Map the netmap shared memory: instead of issuing mmap()
- * inside the body of the threads, we prefer to keep this
- * operation here to simplify the thread logic.
- */
- D("mapping %d Kbytes", nmr.nr_memsize>>10);
- g.mmap_size = nmr.nr_memsize;
- g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
- PROT_WRITE | PROT_READ,
- MAP_SHARED, g.main_fd, 0);
- if (g.mmap_addr == MAP_FAILED) {
- D("Unable to mmap %d KB", nmr.nr_memsize >> 10);
- // continue, fail later
- }
+ if (verbose) {
+ struct netmap_if *nifp = g.nmd->nifp;
+ struct nmreq *req = &g.nmd->req;
- /*
- * Register the interface on the netmap device: from now on,
- * we can operate on the network interface without any
- * interference from the legacy network stack.
- *
- * We decide to put the first interface registration here to
- * give time to cards that take a long time to reset the PHY.
- */
- nmr.nr_version = NETMAP_API;
- if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) {
- D("Unable to register interface %s", g.ifname);
- //continue, fail later
+ D("nifp at offset %d, %d tx %d rx region %d",
+ req->nr_offset, req->nr_tx_rings, req->nr_rx_rings,
+ req->nr_arg2);
+ for (i = 0; i <= req->nr_tx_rings; i++) {
+ D(" TX%d at 0x%lx", i,
+ (char *)NETMAP_TXRING(nifp, i) - (char *)nifp);
+ }
+ for (i = 0; i <= req->nr_rx_rings; i++) {
+ D(" RX%d at 0x%lx", i,
+ (char *)NETMAP_RXRING(nifp, i) - (char *)nifp);
+ }
}
-
/* Print some debug information. */
fprintf(stdout,
"%s %s: %d queues, %d threads and %d cpus.\n",
@@ -1587,7 +1841,8 @@ main(int arc, char **argv)
g.src_ip.name, g.dst_ip.name,
g.src_mac.name, g.dst_mac.name);
}
-
+
+out:
/* Exit if something went wrong. */
if (g.main_fd < 0) {
D("aborting");
@@ -1595,6 +1850,7 @@ main(int arc, char **argv)
}
}
+
if (g.options) {
D("--- SPECIAL OPTIONS:%s%s%s%s%s\n",
g.options & OPT_PREFETCH ? " prefetch" : "",
@@ -1603,23 +1859,27 @@ main(int arc, char **argv)
g.options & OPT_INDIRECT ? " indirect" : "",
g.options & OPT_COPY ? " copy" : "");
}
-
- if (g.tx_rate == 0) {
- g.tx_period.tv_sec = 0;
- g.tx_period.tv_nsec = 0;
- } else if (g.tx_rate == 1) {
- g.tx_period.tv_sec = 1;
- g.tx_period.tv_nsec = 0;
- } else {
- g.tx_period.tv_sec = 0;
- g.tx_period.tv_nsec = (1e9 / g.tx_rate) * g.burst;
- if (g.tx_period.tv_nsec > 1000000000) {
- g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
- g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000;
- }
+
+ g.tx_period.tv_sec = g.tx_period.tv_nsec = 0;
+ if (g.tx_rate > 0) {
+ /* try to have at least something every second,
+ * reducing the burst size to some 0.01s worth of data
+ * (but no less than one full set of fragments)
+ */
+ uint64_t x;
+ int lim = (g.tx_rate)/300;
+ if (g.burst > lim)
+ g.burst = lim;
+ if (g.burst < g.frags)
+ g.burst = g.frags;
+ x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate;
+ g.tx_period.tv_nsec = x;
+ g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
+ g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000;
}
- D("Sending %d packets every %d.%09d ns",
- g.burst, (int)g.tx_period.tv_sec, (int)g.tx_period.tv_nsec);
+ if (g.td_body == sender_body)
+ D("Sending %d packets every %ld.%09ld s",
+ g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec);
/* Wait for PHY reset. */
D("Wait %d secs for phy reset", wait_link);
sleep(wait_link);
@@ -1629,16 +1889,6 @@ main(int arc, char **argv)
global_nthreads = g.nthreads;
signal(SIGINT, sigint_h);
-#if 0 // XXX this is not needed, i believe
- if (g.dev_type > DEV_NETMAP) {
- g.p = pcap_open_live(g.ifname, 0, 1, 100, NULL);
- if (g.p == NULL) {
- D("cannot open pcap on %s", g.ifname);
- usage();
- } else
- D("using pcap %p on %s", g.p, g.ifname);
- }
-#endif // XXX
start_threads(&g);
main_thread(&g);
return 0;
diff --git a/tools/tools/netmap/vale-ctl.c b/tools/tools/netmap/vale-ctl.c
index 0a478ba..e1d8da5 100644
--- a/tools/tools/netmap/vale-ctl.c
+++ b/tools/tools/netmap/vale-ctl.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Michio Honda. All rights reserved.
+ * Copyright (C) 2013-2014 Michio Honda. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -33,6 +33,7 @@
#include <unistd.h> /* close */
#include <sys/ioctl.h> /* ioctl */
#include <sys/param.h>
+#include <sys/socket.h> /* apple needs sockaddr */
#include <net/if.h> /* ifreq */
#include <net/netmap.h>
#include <net/netmap_user.h>
@@ -69,20 +70,22 @@ bdg_ctl(const char *name, int nr_cmd, int nr_arg)
nr_arg = 0;
nmr.nr_arg1 = nr_arg;
error = ioctl(fd, NIOCREGIF, &nmr);
- if (error == -1)
- D("Unable to %s %s to the bridge", nr_cmd ==
+ if (error == -1) {
+ ND("Unable to %s %s to the bridge", nr_cmd ==
NETMAP_BDG_DETACH?"detach":"attach", name);
- else
- D("Success to %s %s to the bridge\n", nr_cmd ==
+ perror(name);
+ } else
+ ND("Success to %s %s to the bridge", nr_cmd ==
NETMAP_BDG_DETACH?"detach":"attach", name);
break;
case NETMAP_BDG_LIST:
if (strlen(nmr.nr_name)) { /* name to bridge/port info */
error = ioctl(fd, NIOCGINFO, &nmr);
- if (error)
- D("Unable to obtain info for %s", name);
- else
+ if (error) {
+ ND("Unable to obtain info for %s", name);
+ perror(name);
+ } else
D("%s at bridge:%d port:%d", name, nmr.nr_arg1,
nmr.nr_arg2);
break;
@@ -101,9 +104,10 @@ bdg_ctl(const char *name, int nr_cmd, int nr_arg)
default: /* GINFO */
nmr.nr_cmd = nmr.nr_arg1 = nmr.nr_arg2 = 0;
error = ioctl(fd, NIOCGINFO, &nmr);
- if (error)
- D("Unable to get if info for %s", name);
- else
+ if (error) {
+ ND("Unable to get if info for %s", name);
+ perror(name);
+ } else
D("%s: %d queues.", name, nmr.nr_rx_rings);
break;
}
@@ -118,7 +122,7 @@ main(int argc, char *argv[])
const char *command = basename(argv[0]);
char *name = NULL;
- if (argc != 3 && argc != 1 /* list all */ ) {
+ if (argc > 3) {
usage:
fprintf(stderr,
"Usage:\n"
@@ -127,12 +131,13 @@ usage:
"\t-d interface interface name to be detached\n"
"\t-a interface interface name to be attached\n"
"\t-h interface interface name to be attached with the host stack\n"
- "\t-l list all or specified bridge's interfaces\n"
+ "\t-l list all or specified bridge's interfaces (default)\n"
"", command);
return 0;
}
- while ((ch = getopt(argc, argv, "d:a:h:g:l:")) != -1) {
+ while ((ch = getopt(argc, argv, "d:a:h:g:l")) != -1) {
+ name = optarg; /* default */
switch (ch) {
default:
fprintf(stderr, "bad option %c %s", ch, optarg);
@@ -152,12 +157,16 @@ usage:
break;
case 'l':
nr_cmd = NETMAP_BDG_LIST;
+ if (optind < argc && argv[optind][0] == '-')
+ name = NULL;
break;
}
- name = optarg;
+ if (optind != argc) {
+ // fprintf(stderr, "optind %d argc %d\n", optind, argc);
+ goto usage;
+ }
}
if (argc == 1)
nr_cmd = NETMAP_BDG_LIST;
- bdg_ctl(name, nr_cmd, nr_arg);
- return 0;
+ return bdg_ctl(name, nr_cmd, nr_arg) ? 1 : 0;
}
OpenPOWER on IntegriCloud