summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2011-11-17 12:17:39 +0000
committerluigi <luigi@FreeBSD.org>2011-11-17 12:17:39 +0000
commitb97eb69f8052729fd82bc3123d4ffd6644d64bac (patch)
tree8be7a4f824011375a281269e79b86fa172e84386 /tools
parent9180d89c26dec8706c0bc7af831d028b46528eb4 (diff)
downloadFreeBSD-src-b97eb69f8052729fd82bc3123d4ffd6644d64bac.zip
FreeBSD-src-b97eb69f8052729fd82bc3123d4ffd6644d64bac.tar.gz
Bring in support for netmap, a framework for very efficient packet
I/O from userspace, capable of line rate at 10G, see http://info.iet.unipi.it/~luigi/netmap/ At this time I am bringing in only the generic code (sys/dev/netmap/ plus two headers under sys/net/), and some sample applications in tools/tools/netmap. There is also a manpage in share/man/man4 [1] In order to make use of the framework you need to build a kernel with "device netmap", and patch individual drivers with the code that you can find in sys/dev/netmap/head.diff The file will go away as the relevant pieces are committed to the various device drivers, which should happen in a few days after talking to the driver maintainers. Netmap support is available at the moment for Intel 10G and 1G cards (ixgbe, em/lem/igb), and for the Realtek 1G card ("re"). I have partial patches for "bge" and am starting to work on "cxgbe". Hopefully changes are trivial enough so interested third parties can submit their patches. Interested people can contact me for advice on how to add netmap support to specific devices. CREDITS: Netmap has been developed by Luigi Rizzo and other collaborators at the Universita` di Pisa, and supported by EU project CHANGE (http://www.change-project.eu/) The code is distributed under a BSD Copyright. [1] In my opinion is a bad idea to have all manpage in one directory. We should place kernel documentation in the same dir that contains the code, which would make it much simpler to keep doc and code in sync, reduce the clutter in share/man/ and incidentally is the policy used for all of userspace code. Makefiles and doc tools can be trivially adjusted to find the manpages in the relevant subdirs.
Diffstat (limited to 'tools')
-rw-r--r--tools/tools/README1
-rw-r--r--tools/tools/netmap/Makefile25
-rw-r--r--tools/tools/netmap/README11
-rw-r--r--tools/tools/netmap/bridge.c456
-rw-r--r--tools/tools/netmap/click-test.cfg19
-rw-r--r--tools/tools/netmap/pcap.c761
-rw-r--r--tools/tools/netmap/pkt-gen.c1021
7 files changed, 2294 insertions, 0 deletions
diff --git a/tools/tools/README b/tools/tools/README
index 253b2e0..9c3db2f 100644
--- a/tools/tools/README
+++ b/tools/tools/README
@@ -50,6 +50,7 @@ mfc Merge a directory from HEAD to a branch where it does not
mid Create a Message-ID database for mailing lists.
mwl Tools specific to the Marvell 88W8363 support
ncpus Count the number of processors
+netmap Test applications for netmap(4)
notescheck Check for missing devices and options in NOTES files.
npe Tools specific to the Intel IXP4XXX NPE device
nxge A diagnostic tool for the nxge(4) driver
diff --git a/tools/tools/netmap/Makefile b/tools/tools/netmap/Makefile
new file mode 100644
index 0000000..4b682e5
--- /dev/null
+++ b/tools/tools/netmap/Makefile
@@ -0,0 +1,25 @@
+#
+# $FreeBSD$
+#
+# For multiple programs using a single source file each,
+# we can just define 'progs' and create custom targets.
+PROGS = pkt-gen bridge testpcap libnetmap.so
+
+CLEANFILES = $(PROGS) pcap.o
+NO_MAN=
+CFLAGS += -Werror -Wall -nostdinc -I/usr/include -I../../../sys
+CFLAGS += -Wextra
+
+LDFLAGS += -lpthread -lpcap
+
+.include <bsd.prog.mk>
+.include <bsd.lib.mk>
+
+all: $(PROGS)
+
+testpcap: pcap.c libnetmap.so
+ $(CC) $(CFLAGS) -L. -lnetmap -o ${.TARGET} pcap.c
+
+libnetmap.so: pcap.c
+ $(CC) $(CFLAGS) -fpic -c ${.ALLSRC}
+ $(CC) -shared -o ${.TARGET} ${.ALLSRC:.c=.o}
diff --git a/tools/tools/netmap/README b/tools/tools/netmap/README
new file mode 100644
index 0000000..9a1ba60
--- /dev/null
+++ b/tools/tools/netmap/README
@@ -0,0 +1,11 @@
+$FreeBSD$
+
+This directory contains examples that use netmap
+
+ pkt-gen a packet sink/source using the netmap API
+
+ bridge a two-port jumper wire, also using the native API
+
+ testpcap a jumper wire using libnetmap (or libpcap)
+
+ click* various click examples
diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c
new file mode 100644
index 0000000..2385a08
--- /dev/null
+++ b/tools/tools/netmap/bridge.c
@@ -0,0 +1,456 @@
+/*
+ * (C) 2011 Luigi Rizzo, Matteo Landi
+ *
+ * BSD license
+ *
+ * A netmap client to bridge two network interfaces
+ * (or one interface and the host stack).
+ *
+ * $FreeBSD$
+ */
+
+#include <errno.h>
+#include <signal.h> /* signal */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h> /* strcmp */
+#include <fcntl.h> /* open */
+#include <unistd.h> /* close */
+
+#include <sys/endian.h> /* le64toh */
+#include <sys/mman.h> /* PROT_* */
+#include <sys/ioctl.h> /* ioctl */
+#include <machine/param.h>
+#include <sys/poll.h>
+#include <sys/socket.h> /* sockaddr.. */
+#include <arpa/inet.h> /* ntohs */
+
+#include <net/if.h> /* ifreq */
+#include <net/ethernet.h>
+#include <net/netmap.h>
+#include <net/netmap_user.h>
+
+#include <netinet/in.h> /* sockaddr_in */
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+int verbose = 0;
+
+/* debug support */
+#define ND(format, ...) {}
+#define D(format, ...) do { \
+ if (!verbose) break; \
+ struct timeval _xxts; \
+ gettimeofday(&_xxts, NULL); \
+ fprintf(stderr, "%03d.%06d %s [%d] " format "\n", \
+ (int)_xxts.tv_sec %1000, (int)_xxts.tv_usec, \
+ __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+ } while (0)
+
+
+char *version = "$Id: bridge.c 9642 2011-11-07 21:39:47Z luigi $";
+
+static int do_abort = 0;
+
+/*
+ * info on a ring we handle
+ */
+struct my_ring {
+ const char *ifname;
+ int fd;
+ char *mem; /* userspace mmap address */
+ u_int memsize;
+ u_int queueid;
+ u_int begin, end; /* first..last+1 rings to check */
+ struct netmap_if *nifp;
+ struct netmap_ring *tx, *rx; /* shortcuts */
+
+ uint32_t if_flags;
+ uint32_t if_reqcap;
+ uint32_t if_curcap;
+};
+
+static void
+sigint_h(__unused int sig)
+{
+ do_abort = 1;
+ signal(SIGINT, SIG_DFL);
+}
+
+
+static int
+do_ioctl(struct my_ring *me, int what)
+{
+ struct ifreq ifr;
+ int error;
+
+ bzero(&ifr, sizeof(ifr));
+ strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name));
+ switch (what) {
+ case SIOCSIFFLAGS:
+ ifr.ifr_flagshigh = me->if_flags >> 16;
+ ifr.ifr_flags = me->if_flags & 0xffff;
+ break;
+ case SIOCSIFCAP:
+ ifr.ifr_reqcap = me->if_reqcap;
+ ifr.ifr_curcap = me->if_curcap;
+ break;
+ }
+ error = ioctl(me->fd, what, &ifr);
+ if (error) {
+ D("ioctl error %d", what);
+ return error;
+ }
+ switch (what) {
+ case SIOCGIFFLAGS:
+ me->if_flags = (ifr.ifr_flagshigh << 16) |
+ (0xffff & ifr.ifr_flags);
+ if (verbose)
+ D("flags are 0x%x", me->if_flags);
+ break;
+
+ case SIOCGIFCAP:
+ me->if_reqcap = ifr.ifr_reqcap;
+ me->if_curcap = ifr.ifr_curcap;
+ if (verbose)
+ D("curcap are 0x%x", me->if_curcap);
+ break;
+ }
+ return 0;
+}
+
+/*
+ * open a device. if me->mem is null then do an mmap.
+ */
+static int
+netmap_open(struct my_ring *me, int ringid)
+{
+ int fd, err, l;
+ struct nmreq req;
+
+ me->fd = fd = open("/dev/netmap", O_RDWR);
+ if (fd < 0) {
+ D("Unable to open /dev/netmap");
+ return (-1);
+ }
+ bzero(&req, sizeof(req));
+ strncpy(req.nr_name, me->ifname, sizeof(req.nr_name));
+ req.nr_ringid = ringid;
+ err = ioctl(fd, NIOCGINFO, &req);
+ if (err) {
+ D("cannot get info on %s", me->ifname);
+ goto error;
+ }
+ me->memsize = l = req.nr_memsize;
+ if (verbose)
+ D("memsize is %d MB", l>>20);
+ err = ioctl(fd, NIOCREGIF, &req);
+ if (err) {
+ D("Unable to register %s", me->ifname);
+ goto error;
+ }
+
+ if (me->mem == NULL) {
+ me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
+ if (me->mem == MAP_FAILED) {
+ D("Unable to mmap");
+ me->mem = NULL;
+ goto error;
+ }
+ }
+
+ me->nifp = NETMAP_IF(me->mem, req.nr_offset);
+ me->queueid = ringid;
+ if (ringid & NETMAP_SW_RING) {
+ me->begin = req.nr_numrings;
+ me->end = me->begin + 1;
+ } else if (ringid & NETMAP_HW_RING) {
+ me->begin = ringid & NETMAP_RING_MASK;
+ me->end = me->begin + 1;
+ } else {
+ me->begin = 0;
+ me->end = req.nr_numrings;
+ }
+ me->tx = NETMAP_TXRING(me->nifp, me->begin);
+ me->rx = NETMAP_RXRING(me->nifp, me->begin);
+ return (0);
+error:
+ close(me->fd);
+ return -1;
+}
+
+
+static int
+netmap_close(struct my_ring *me)
+{
+ D("");
+ if (me->mem)
+ munmap(me->mem, me->memsize);
+ ioctl(me->fd, NIOCUNREGIF, NULL);
+ close(me->fd);
+ return (0);
+}
+
+
+/*
+ * move up to 'limit' pkts from rxring to txring swapping buffers.
+ */
+static int
+process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
+ u_int limit, const char *msg)
+{
+ u_int j, k, m = 0;
+
+ /* print a warning if any of the ring flags is set (e.g. NM_REINIT) */
+ if (rxring->flags || txring->flags)
+ D("%s rxflags %x txflags %x",
+ msg, rxring->flags, txring->flags);
+ j = rxring->cur; /* RX */
+ k = txring->cur; /* TX */
+ if (rxring->avail < limit)
+ limit = rxring->avail;
+ if (txring->avail < limit)
+ limit = txring->avail;
+ m = limit;
+ while (limit-- > 0) {
+ struct netmap_slot *rs = &rxring->slot[j];
+ struct netmap_slot *ts = &txring->slot[k];
+ uint32_t pkt;
+
+ /* swap packets */
+ if (ts->buf_idx < 2 || rs->buf_idx < 2) {
+ D("wrong index rx[%d] = %d -> tx[%d] = %d",
+ j, rs->buf_idx, k, ts->buf_idx);
+ sleep(2);
+ }
+ pkt = ts->buf_idx;
+ ts->buf_idx = rs->buf_idx;
+ rs->buf_idx = pkt;
+
+ /* copy the packet lenght. */
+ if (rs->len < 14 || rs->len > 2048)
+ D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k);
+ else if (verbose > 1)
+ D("send len %d rx[%d] -> tx[%d]", rs->len, j, k);
+ ts->len = rs->len;
+
+ /* report the buffer change. */
+ ts->flags |= NS_BUF_CHANGED;
+ rs->flags |= NS_BUF_CHANGED;
+ j = NETMAP_RING_NEXT(rxring, j);
+ k = NETMAP_RING_NEXT(txring, k);
+ }
+ rxring->avail -= m;
+ txring->avail -= m;
+ rxring->cur = j;
+ txring->cur = k;
+ if (verbose && m > 0)
+ D("sent %d packets to %p", m, txring);
+
+ return (m);
+}
+
+/* move packts from src to destination */
+static int
+move(struct my_ring *src, struct my_ring *dst, u_int limit)
+{
+ struct netmap_ring *txring, *rxring;
+ u_int m = 0, si = src->begin, di = dst->begin;
+ const char *msg = (src->queueid & NETMAP_SW_RING) ?
+ "host->net" : "net->host";
+
+ while (si < src->end && di < dst->end) {
+ rxring = NETMAP_RXRING(src->nifp, si);
+ txring = NETMAP_TXRING(dst->nifp, di);
+ ND("txring %p rxring %p", txring, rxring);
+ if (rxring->avail == 0) {
+ si++;
+ continue;
+ }
+ if (txring->avail == 0) {
+ di++;
+ continue;
+ }
+ m += process_rings(rxring, txring, limit, msg);
+ }
+
+ return (m);
+}
+
+/*
+ * how many packets on this set of queues ?
+ */
+static int
+howmany(struct my_ring *me, int tx)
+{
+ u_int i, tot = 0;
+
+ ND("me %p begin %d end %d", me, me->begin, me->end);
+ for (i = me->begin; i < me->end; i++) {
+ struct netmap_ring *ring = tx ?
+ NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i);
+ tot += ring->avail;
+ }
+ if (0 && verbose && tot && !tx)
+ D("ring %s %s %s has %d avail at %d",
+ me->ifname, tx ? "tx": "rx",
+ me->end > me->nifp->ni_num_queues ?
+ "host":"net",
+ tot, NETMAP_TXRING(me->nifp, me->begin)->cur);
+ return tot;
+}
+
+/*
+ * bridge [-v] if1 [if2]
+ *
+ * If only one name, or the two interfaces are the same,
+ * bridges userland and the adapter. Otherwise bridge
+ * two intefaces.
+ */
+int
+main(int argc, char **argv)
+{
+ struct pollfd pollfd[2];
+ int i;
+ u_int burst = 1024;
+ struct my_ring me[2];
+
+ fprintf(stderr, "%s %s built %s %s\n",
+ argv[0], version, __DATE__, __TIME__);
+
+ bzero(me, sizeof(me));
+
+ while (argc > 1 && !strcmp(argv[1], "-v")) {
+ verbose++;
+ argv++;
+ argc--;
+ }
+
+ if (argc < 2 || argc > 4) {
+ D("Usage: %s IFNAME1 [IFNAME2 [BURST]]", argv[0]);
+ return (1);
+ }
+
+ /* setup netmap interface #1. */
+ me[0].ifname = argv[1];
+ if (argc == 2 || !strcmp(argv[1], argv[2])) {
+ D("same interface, endpoint 0 goes to host");
+ i = NETMAP_SW_RING;
+ me[1].ifname = argv[1];
+ } else {
+ /* two different interfaces. Take all rings on if1 */
+ i = 0; // all hw rings
+ me[1].ifname = argv[2];
+ }
+ if (netmap_open(me, i))
+ return (1);
+ me[1].mem = me[0].mem; /* copy the pointer, so only one mmap */
+ if (netmap_open(me+1, 0))
+ return (1);
+
+ /* if bridging two interfaces, set promisc mode */
+ if (i != NETMAP_SW_RING) {
+ do_ioctl(me, SIOCGIFFLAGS);
+ if ((me[0].if_flags & IFF_UP) == 0) {
+ D("%s is down, bringing up...", me[0].ifname);
+ me[0].if_flags |= IFF_UP;
+ }
+ me[0].if_flags |= IFF_PPROMISC;
+ do_ioctl(me, SIOCSIFFLAGS);
+
+ do_ioctl(me+1, SIOCGIFFLAGS);
+ me[1].if_flags |= IFF_PPROMISC;
+ do_ioctl(me+1, SIOCSIFFLAGS);
+
+ /* also disable checksums etc. */
+ do_ioctl(me, SIOCGIFCAP);
+ me[0].if_reqcap = me[0].if_curcap;
+ me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
+ do_ioctl(me+0, SIOCSIFCAP);
+ }
+ do_ioctl(me+1, SIOCGIFFLAGS);
+ if ((me[1].if_flags & IFF_UP) == 0) {
+ D("%s is down, bringing up...", me[1].ifname);
+ me[1].if_flags |= IFF_UP;
+ }
+ do_ioctl(me+1, SIOCSIFFLAGS);
+
+ do_ioctl(me+1, SIOCGIFCAP);
+ me[1].if_reqcap = me[1].if_curcap;
+ me[1].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
+ do_ioctl(me+1, SIOCSIFCAP);
+ if (argc > 3)
+ burst = atoi(argv[3]); /* packets burst size. */
+
+ /* setup poll(2) variables. */
+ memset(pollfd, 0, sizeof(pollfd));
+ for (i = 0; i < 2; i++) {
+ pollfd[i].fd = me[i].fd;
+ pollfd[i].events = (POLLIN);
+ }
+
+ D("Wait 2 secs for link to come up...");
+ sleep(2);
+ D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.",
+ me[0].ifname, me[0].queueid, me[0].nifp->ni_num_queues,
+ me[1].ifname, me[1].queueid, me[1].nifp->ni_num_queues);
+
+ /* main loop */
+ signal(SIGINT, sigint_h);
+ while (!do_abort) {
+ int n0, n1, ret;
+ pollfd[0].events = pollfd[1].events = 0;
+ pollfd[0].revents = pollfd[1].revents = 0;
+ n0 = howmany(me, 0);
+ n1 = howmany(me + 1, 0);
+ if (n0)
+ pollfd[1].events |= POLLOUT;
+ else
+ pollfd[0].events |= POLLIN;
+ if (n1)
+ pollfd[0].events |= POLLOUT;
+ else
+ pollfd[1].events |= POLLIN;
+ ret = poll(pollfd, 2, 2500);
+ if (ret <= 0 || verbose)
+ D("poll %s [0] ev %x %x rx %d@%d tx %d,"
+ " [1] ev %x %x rx %d@%d tx %d",
+ ret <= 0 ? "timeout" : "ok",
+ pollfd[0].events,
+ pollfd[0].revents,
+ howmany(me, 0),
+ me[0].rx->cur,
+ howmany(me, 1),
+ pollfd[1].events,
+ pollfd[1].revents,
+ howmany(me+1, 0),
+ me[1].rx->cur,
+ howmany(me+1, 1)
+ );
+ if (ret < 0)
+ continue;
+ if (pollfd[0].revents & POLLERR) {
+ D("error on fd0, rxcur %d@%d",
+ me[0].rx->avail, me[0].rx->cur);
+ }
+ if (pollfd[1].revents & POLLERR) {
+ D("error on fd1, rxcur %d@%d",
+ me[1].rx->avail, me[1].rx->cur);
+ }
+ if (pollfd[0].revents & POLLOUT) {
+ move(me + 1, me, burst);
+ // XXX we don't need the ioctl */
+ // ioctl(me[0].fd, NIOCTXSYNC, NULL);
+ }
+ if (pollfd[1].revents & POLLOUT) {
+ move(me, me + 1, burst);
+ // XXX we don't need the ioctl */
+ // ioctl(me[1].fd, NIOCTXSYNC, NULL);
+ }
+ }
+ D("exiting");
+ netmap_close(me + 1);
+ netmap_close(me + 0);
+
+ return (0);
+}
diff --git a/tools/tools/netmap/click-test.cfg b/tools/tools/netmap/click-test.cfg
new file mode 100644
index 0000000..fc5759f
--- /dev/null
+++ b/tools/tools/netmap/click-test.cfg
@@ -0,0 +1,19 @@
+//
+// $FreeBSD$
+//
+// A sample test configuration for click
+//
+//
+// create a switch
+
+myswitch :: EtherSwitch;
+
+// two input devices
+
+c0 :: FromDevice(ix0, PROMISC true);
+c1 :: FromDevice(ix1, PROMISC true);
+
+// and now pass packets around
+
+c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0);
+c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1);
diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c
new file mode 100644
index 0000000..f010b83
--- /dev/null
+++ b/tools/tools/netmap/pcap.c
@@ -0,0 +1,761 @@
+/*
+ * (C) 2011 Luigi Rizzo
+ *
+ * BSD license
+ *
+ * A simple library that maps some pcap functions onto netmap
+ * This is not 100% complete but enough to let tcpdump, trafshow
+ * and other apps work.
+ *
+ * $FreeBSD$
+ */
+
+#include <errno.h>
+#include <signal.h> /* signal */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h> /* strcmp */
+#include <fcntl.h> /* open */
+#include <unistd.h> /* close */
+
+#include <sys/endian.h> /* le64toh */
+#include <sys/mman.h> /* PROT_* */
+#include <sys/ioctl.h> /* ioctl */
+#include <machine/param.h>
+#include <sys/poll.h>
+#include <sys/socket.h> /* sockaddr.. */
+#include <arpa/inet.h> /* ntohs */
+
+#include <net/if.h> /* ifreq */
+#include <net/ethernet.h>
+#include <net/netmap.h>
+#include <net/netmap_user.h>
+
+#include <netinet/in.h> /* sockaddr_in */
+
+#include <sys/socket.h>
+#include <ifaddrs.h>
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+char *version = "$Id$";
+int verbose = 0;
+
+/* debug support */
+#define ND(format, ...) do {} while (0)
+#define D(format, ...) do { \
+ if (verbose) \
+ fprintf(stderr, "--- %s [%d] " format "\n", \
+ __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+ } while (0)
+
+
+/*
+ * We redefine here a number of structures that are in pcap.h
+ * so we can compile this file without the system header.
+ */
+#ifndef PCAP_ERRBUF_SIZE
+#define PCAP_ERRBUF_SIZE 128
+
+/*
+ * Each packet is accompanied by a header including the timestamp,
+ * captured size and actual size.
+ */
+struct pcap_pkthdr {
+ struct timeval ts; /* time stamp */
+ uint32_t caplen; /* length of portion present */
+ uint32_t len; /* length this packet (off wire) */
+};
+
+typedef struct pcap_if pcap_if_t;
+
+/*
+ * Representation of an interface address.
+ */
+struct pcap_addr {
+ struct pcap_addr *next;
+ struct sockaddr *addr; /* address */
+ struct sockaddr *netmask; /* netmask for the above */
+ struct sockaddr *broadaddr; /* broadcast addr for the above */
+ struct sockaddr *dstaddr; /* P2P dest. address for the above */
+};
+
+struct pcap_if {
+ struct pcap_if *next;
+ char *name; /* name to hand to "pcap_open_live()" */
+ char *description; /* textual description of interface, or NULL */
+ struct pcap_addr *addresses;
+ uint32_t flags; /* PCAP_IF_ interface flags */
+};
+
+/*
+ * We do not support stats (yet)
+ */
+struct pcap_stat {
+ u_int ps_recv; /* number of packets received */
+ u_int ps_drop; /* number of packets dropped */
+ u_int ps_ifdrop; /* drops by interface XXX not yet supported */
+#ifdef WIN32
+ u_int bs_capt; /* number of packets that reach the app. */
+#endif /* WIN32 */
+};
+
+typedef void pcap_t;
+typedef enum {
+ PCAP_D_INOUT = 0,
+ PCAP_D_IN,
+ PCAP_D_OUT
+} pcap_direction_t;
+
+
+
+typedef void (*pcap_handler)(u_char *user,
+ const struct pcap_pkthdr *h, const u_char *bytes);
+
+char errbuf[PCAP_ERRBUF_SIZE];
+
+pcap_t *pcap_open_live(const char *device, int snaplen,
+ int promisc, int to_ms, char *errbuf);
+
+int pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf);
+void pcap_close(pcap_t *p);
+int pcap_get_selectable_fd(pcap_t *p);
+int pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user);
+int pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf);
+int pcap_setdirection(pcap_t *p, pcap_direction_t d);
+char *pcap_lookupdev(char *errbuf);
+int pcap_inject(pcap_t *p, const void *buf, size_t size);
+int pcap_fileno(pcap_t *p);
+
+struct eproto {
+ const char *s;
+ u_short p;
+};
+#endif /* !PCAP_ERRBUF_SIZE */
+
+#ifdef __PIC__
+/*
+ * build as a shared library
+ */
+
+char pcap_version[] = "libnetmap version 0.3";
+
+/*
+ * Our equivalent of pcap_t
+ */
+struct my_ring {
+ struct nmreq nmr;
+
+ int fd;
+ char *mem; /* userspace mmap address */
+ u_int memsize;
+ u_int queueid;
+ u_int begin, end; /* first..last+1 rings to check */
+ struct netmap_if *nifp;
+
+ int snaplen;
+ char *errbuf;
+ int promisc;
+ int to_ms;
+
+ struct pcap_pkthdr hdr;
+
+ uint32_t if_flags;
+ uint32_t if_reqcap;
+ uint32_t if_curcap;
+
+ struct pcap_stat st;
+
+ char msg[PCAP_ERRBUF_SIZE];
+};
+
+
+static int
+do_ioctl(struct my_ring *me, int what)
+{
+ struct ifreq ifr;
+ int error;
+
+ bzero(&ifr, sizeof(ifr));
+ strncpy(ifr.ifr_name, me->nmr.nr_name, sizeof(ifr.ifr_name));
+ switch (what) {
+ case SIOCSIFFLAGS:
+ D("call SIOCSIFFLAGS 0x%x", me->if_flags);
+ ifr.ifr_flagshigh = (me->if_flags >> 16) & 0xffff;
+ ifr.ifr_flags = me->if_flags & 0xffff;
+ break;
+ case SIOCSIFCAP:
+ ifr.ifr_reqcap = me->if_reqcap;
+ ifr.ifr_curcap = me->if_curcap;
+ break;
+ }
+ error = ioctl(me->fd, what, &ifr);
+ if (error) {
+ D("ioctl 0x%x error %d", what, error);
+ return error;
+ }
+ switch (what) {
+ case SIOCSIFFLAGS:
+ case SIOCGIFFLAGS:
+ me->if_flags = (ifr.ifr_flagshigh << 16) |
+ (0xffff & ifr.ifr_flags);
+ D("flags are L 0x%x H 0x%x 0x%x",
+ (uint16_t)ifr.ifr_flags,
+ (uint16_t)ifr.ifr_flagshigh, me->if_flags);
+ break;
+
+ case SIOCGIFCAP:
+ me->if_reqcap = ifr.ifr_reqcap;
+ me->if_curcap = ifr.ifr_curcap;
+ D("curcap are 0x%x", me->if_curcap);
+ break;
+ }
+ return 0;
+}
+
+
+/*
+ * open a device. if me->mem is null then do an mmap.
+ */
+static int
+netmap_open(struct my_ring *me, int ringid)
+{
+ int fd, err, l;
+ u_int i;
+ struct nmreq req;
+
+ me->fd = fd = open("/dev/netmap", O_RDWR);
+ if (fd < 0) {
+ D("Unable to open /dev/netmap");
+ return (-1);
+ }
+ bzero(&req, sizeof(req));
+ strncpy(req.nr_name, me->nmr.nr_name, sizeof(req.nr_name));
+ req.nr_ringid = ringid;
+ err = ioctl(fd, NIOCGINFO, &req);
+ if (err) {
+ D("cannot get info on %s", me->nmr.nr_name);
+ goto error;
+ }
+ me->memsize = l = req.nr_memsize;
+ ND("memsize is %d MB", l>>20);
+ err = ioctl(fd, NIOCREGIF, &req);
+ if (err) {
+ D("Unable to register %s", me->nmr.nr_name);
+ goto error;
+ }
+
+ if (me->mem == NULL) {
+ me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
+ if (me->mem == MAP_FAILED) {
+ D("Unable to mmap");
+ me->mem = NULL;
+ goto error;
+ }
+ }
+
+ me->nifp = NETMAP_IF(me->mem, req.nr_offset);
+ me->queueid = ringid;
+ if (ringid & NETMAP_SW_RING) {
+ me->begin = req.nr_numrings;
+ me->end = me->begin + 1;
+ } else if (ringid & NETMAP_HW_RING) {
+ me->begin = ringid & NETMAP_RING_MASK;
+ me->end = me->begin + 1;
+ } else {
+ me->begin = 0;
+ me->end = req.nr_numrings;
+ }
+ /* request timestamps for packets */
+ for (i = me->begin; i < me->end; i++) {
+ struct netmap_ring *ring = NETMAP_RXRING(me->nifp, i);
+ ring->flags = NR_TIMESTAMP;
+ }
+ //me->tx = NETMAP_TXRING(me->nifp, 0);
+ return (0);
+error:
+ close(me->fd);
+ return -1;
+}
+
+/*
+ * There is a set of functions that tcpdump expects even if probably
+ * not used
+ */
+struct eproto eproto_db[] = {
+ { "ip", ETHERTYPE_IP },
+ { "arp", ETHERTYPE_ARP },
+ { (char *)0, 0 }
+};
+
+
+int
+pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf)
+{
+ struct ifaddrs *i_head, *i;
+ pcap_if_t *top = NULL, *cur;
+ struct pcap_addr *tail = NULL;
+ int l;
+
+ D("listing all devs");
+ *alldevsp = NULL;
+ i_head = NULL;
+
+ if (getifaddrs(&i_head)) {
+ D("cannot get if addresses");
+ return -1;
+ }
+ for (i = i_head; i; i = i->ifa_next) {
+ //struct ifaddrs *ifa;
+ struct pcap_addr *pca;
+ //struct sockaddr *sa;
+
+ D("got interface %s", i->ifa_name);
+ if (!top || strcmp(top->name, i->ifa_name)) {
+ /* new interface */
+ l = sizeof(*top) + strlen(i->ifa_name) + 1;
+ cur = calloc(1, l);
+ if (cur == NULL) {
+ D("no space for if descriptor");
+ continue;
+ }
+ cur->name = (char *)(cur + 1);
+ //cur->flags = i->ifa_flags;
+ strcpy(cur->name, i->ifa_name);
+ cur->description = NULL;
+ cur->next = top;
+ top = cur;
+ tail = NULL;
+ }
+ /* now deal with addresses */
+ D("%s addr family %d len %d %s %s",
+ top->name,
+ i->ifa_addr->sa_family, i->ifa_addr->sa_len,
+ i->ifa_netmask ? "Netmask" : "",
+ i->ifa_broadaddr ? "Broadcast" : "");
+ l = sizeof(struct pcap_addr) +
+ (i->ifa_addr ? i->ifa_addr->sa_len:0) +
+ (i->ifa_netmask ? i->ifa_netmask->sa_len:0) +
+ (i->ifa_broadaddr? i->ifa_broadaddr->sa_len:0);
+ pca = calloc(1, l);
+ if (pca == NULL) {
+ D("no space for if addr");
+ continue;
+ }
+#define SA_NEXT(x) ((struct sockaddr *)((char *)(x) + (x)->sa_len))
+ pca->addr = (struct sockaddr *)(pca + 1);
+ bcopy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len);
+ if (i->ifa_netmask) {
+ pca->netmask = SA_NEXT(pca->addr);
+ bcopy(i->ifa_netmask, pca->netmask, i->ifa_netmask->sa_len);
+ if (i->ifa_broadaddr) {
+ pca->broadaddr = SA_NEXT(pca->netmask);
+ bcopy(i->ifa_broadaddr, pca->broadaddr, i->ifa_broadaddr->sa_len);
+ }
+ }
+ if (tail == NULL) {
+ top->addresses = pca;
+ } else {
+ tail->next = pca;
+ }
+ tail = pca;
+
+ }
+ freeifaddrs(i_head);
+ *alldevsp = top;
+ return 0;
+}
+
+void pcap_freealldevs(__unused pcap_if_t *alldevs)
+{
+ D("unimplemented");
+}
+
+char *
+pcap_lookupdev(char *buf)
+{
+ D("%s", buf);
+ strcpy(buf, "/dev/netmap");
+ return buf;
+}
+
+pcap_t *
+pcap_create(const char *source, char *errbuf)
+{
+ D("src %s (call open liveted)", source);
+ return pcap_open_live(source, 0, 1, 100, errbuf);
+}
+
+int
+pcap_activate(pcap_t *p)
+{
+ D("pcap %p running", p);
+ return 0;
+}
+
+int
+pcap_can_set_rfmon(__unused pcap_t *p)
+{
+ D("");
+ return 0; /* no we can't */
+}
+
+int
+pcap_set_snaplen(pcap_t *p, int snaplen)
+{
+ struct my_ring *me = p;
+
+ D("len %d", snaplen);
+ me->snaplen = snaplen;
+ return 0;
+}
+
+int
+pcap_snapshot(pcap_t *p)
+{
+ struct my_ring *me = p;
+
+ D("len %d", me->snaplen);
+ return me->snaplen;
+}
+
+int
+pcap_lookupnet(const char *device, uint32_t *netp,
+ uint32_t *maskp, __unused char *errbuf)
+{
+
+ D("device %s", device);
+ inet_aton("10.0.0.255", (struct in_addr *)netp);
+ inet_aton("255.255.255.0",(struct in_addr *) maskp);
+ return 0;
+}
+
+int
+pcap_set_promisc(pcap_t *p, int promisc)
+{
+ struct my_ring *me = p;
+
+ D("promisc %d", promisc);
+ if (do_ioctl(me, SIOCGIFFLAGS))
+ D("SIOCGIFFLAGS failed");
+ if (promisc) {
+ me->if_flags |= IFF_PPROMISC;
+ } else {
+ me->if_flags &= ~IFF_PPROMISC;
+ }
+ if (do_ioctl(me, SIOCSIFFLAGS))
+ D("SIOCSIFFLAGS failed");
+ return 0;
+}
+
+int
+pcap_set_timeout(pcap_t *p, int to_ms)
+{
+ struct my_ring *me = p;
+
+ D("%d ms", to_ms);
+ me->to_ms = to_ms;
+ return 0;
+}
+
+struct bpf_program;
+
+int
+pcap_compile(__unused pcap_t *p, __unused struct bpf_program *fp,
+ const char *str, __unused int optimize, __unused uint32_t netmask)
+{
+ D("%s", str);
+ return 0;
+}
+
+int
+pcap_setfilter(__unused pcap_t *p, __unused struct bpf_program *fp)
+{
+ D("");
+ return 0;
+}
+
+int
+pcap_datalink(__unused pcap_t *p)
+{
+ D("");
+ return 1; // ethernet
+}
+
+const char *
+pcap_datalink_val_to_name(int dlt)
+{
+ D("%d", dlt);
+ return "DLT_EN10MB";
+}
+
+const char *
+pcap_datalink_val_to_description(int dlt)
+{
+ D("%d", dlt);
+ return "Ethernet link";
+}
+
+struct pcap_stat;
+int
+pcap_stats(pcap_t *p, struct pcap_stat *ps)
+{
+ struct my_ring *me = p;
+ ND("");
+
+ me->st.ps_recv += 10;
+ *ps = me->st;
+ sprintf(me->msg, "stats not supported");
+ return -1;
+};
+
+char *
+pcap_geterr(pcap_t *p)
+{
+ struct my_ring *me = p;
+
+ D("");
+ return me->msg;
+}
+
+pcap_t *
+pcap_open_live(const char *device, __unused int snaplen,
+ int promisc, int to_ms, __unused char *errbuf)
+{
+ struct my_ring *me;
+
+ D("request to open %s", device);
+ me = calloc(1, sizeof(*me));
+ if (me == NULL) {
+ D("failed to allocate struct for %s", device);
+ return NULL;
+ }
+ strncpy(me->nmr.nr_name, device, sizeof(me->nmr.nr_name));
+ if (netmap_open(me, 0)) {
+ D("error opening %s", device);
+ free(me);
+ return NULL;
+ }
+ me->to_ms = to_ms;
+ if (do_ioctl(me, SIOCGIFFLAGS))
+ D("SIOCGIFFLAGS failed");
+ if (promisc) {
+ me->if_flags |= IFF_PPROMISC;
+ if (do_ioctl(me, SIOCSIFFLAGS))
+ D("SIOCSIFFLAGS failed");
+ }
+ if (do_ioctl(me, SIOCGIFCAP))
+ D("SIOCGIFCAP failed");
+ me->if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
+ if (do_ioctl(me, SIOCSIFCAP))
+ D("SIOCSIFCAP failed");
+
+ return (pcap_t *)me;
+}
+
+void
+pcap_close(pcap_t *p)
+{
+ struct my_ring *me = p;
+
+ D("");
+ if (!me)
+ return;
+ if (me->mem)
+ munmap(me->mem, me->memsize);
+ /* restore original flags ? */
+ ioctl(me->fd, NIOCUNREGIF, NULL);
+ close(me->fd);
+ bzero(me, sizeof(*me));
+ free(me);
+}
+
+int
+pcap_fileno(pcap_t *p)
+{
+ struct my_ring *me = p;
+ D("returns %d", me->fd);
+ return me->fd;
+}
+
+int
+pcap_get_selectable_fd(pcap_t *p)
+{
+ struct my_ring *me = p;
+
+ ND("");
+ return me->fd;
+}
+
+int
+pcap_setnonblock(__unused pcap_t *p, int nonblock, __unused char *errbuf)
+{
+ D("mode is %d", nonblock);
+ return 0; /* ignore */
+}
+
+int
+pcap_setdirection(__unused pcap_t *p, __unused pcap_direction_t d)
+{
+ D("");
+ return 0; /* ignore */
+};
+
+int
+pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
+{
+ struct my_ring *me = p;
+ int got = 0;
+ u_int si;
+
+ ND("cnt %d", cnt);
+ /* scan all rings */
+ for (si = me->begin; si < me->end; si++) {
+ struct netmap_ring *ring = NETMAP_RXRING(me->nifp, si);
+ ND("ring has %d pkts", ring->avail);
+ if (ring->avail == 0)
+ continue;
+ me->hdr.ts = ring->ts;
+ while ((cnt == -1 || cnt != got) && ring->avail > 0) {
+ u_int i = ring->cur;
+ u_int idx = ring->slot[i].buf_idx;
+ if (idx < 2) {
+ D("%s bogus RX index %d at offset %d",
+ me->nifp->ni_name, idx, i);
+ sleep(2);
+ }
+ u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
+ me->hdr.len = me->hdr.caplen = ring->slot[i].len;
+ // D("call %p len %d", p, me->hdr.len);
+ callback(user, &me->hdr, buf);
+ ring->cur = NETMAP_RING_NEXT(ring, i);
+ ring->avail--;
+ got++;
+ }
+ }
+ return got;
+}
+
+int
+pcap_inject(pcap_t *p, const void *buf, size_t size)
+{
+ struct my_ring *me = p;
+ u_int si;
+
+ ND("cnt %d", cnt);
+ /* scan all rings */
+ for (si = me->begin; si < me->end; si++) {
+ struct netmap_ring *ring = NETMAP_TXRING(me->nifp, si);
+
+ ND("ring has %d pkts", ring->avail);
+ if (ring->avail == 0)
+ continue;
+ u_int i = ring->cur;
+ u_int idx = ring->slot[i].buf_idx;
+ if (idx < 2) {
+ D("%s bogus TX index %d at offset %d",
+ me->nifp->ni_name, idx, i);
+ sleep(2);
+ }
+ u_char *dst = (u_char *)NETMAP_BUF(ring, idx);
+ ring->slot[i].len = size;
+ bcopy(buf, dst, size);
+ ring->cur = NETMAP_RING_NEXT(ring, i);
+ ring->avail--;
+ // if (ring->avail == 0) ioctl(me->fd, NIOCTXSYNC, NULL);
+ return size;
+ }
+ errno = ENOBUFS;
+ return -1;
+}
+
+int
+pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
+{
+ struct my_ring *me = p;
+ struct pollfd fds[1];
+ int i;
+
+ ND("cnt %d", cnt);
+ memset(fds, 0, sizeof(fds));
+ fds[0].fd = me->fd;
+ fds[0].events = (POLLIN);
+
+ while (cnt == -1 || cnt > 0) {
+ if (poll(fds, 1, me->to_ms) <= 0) {
+ D("poll error/timeout");
+ continue;
+ }
+ i = pcap_dispatch(p, cnt, callback, user);
+ if (cnt > 0)
+ cnt -= i;
+ }
+ return 0;
+}
+
+#endif /* __PIC__ */
+
+#ifndef __PIC__
+void do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf)
+{
+ pcap_inject((pcap_t *)user, buf, h->caplen);
+}
+
+/*
+ * a simple pcap test program, bridge between two interfaces.
+ */
+int
+main(int argc, char **argv)
+{
+ pcap_t *p0, *p1;
+ int burst = 1024;
+ struct pollfd pollfd[2];
+
+ fprintf(stderr, "%s %s built %s %s\n",
+ argv[0], version, __DATE__, __TIME__);
+
+ while (argc > 1 && !strcmp(argv[1], "-v")) {
+ verbose++;
+ argv++;
+ argc--;
+ }
+
+ if (argc < 3 || argc > 4 || !strcmp(argv[1], argv[2])) {
+ D("Usage: %s IFNAME1 IFNAME2 [BURST]", argv[0]);
+ return (1);
+ }
+ if (argc > 3)
+ burst = atoi(argv[3]);
+
+ p0 = pcap_open_live(argv[1], 0, 1, 100, NULL);
+ p1 = pcap_open_live(argv[2], 0, 1, 100, NULL);
+ D("%s", version);
+ D("open returns %p %p", p0, p1);
+ if (!p0 || !p1)
+ return(1);
+ bzero(pollfd, sizeof(pollfd));
+ pollfd[0].fd = pcap_fileno(p0);
+ pollfd[1].fd = pcap_fileno(p1);
+ pollfd[0].events = pollfd[1].events = POLLIN;
+ for (;;) {
+ /* do i need to reset ? */
+ pollfd[0].revents = pollfd[1].revents = 0;
+ int ret = poll(pollfd, 2, 1000);
+ if (ret <= 0 || verbose)
+ D("poll %s [0] ev %x %x [1] ev %x %x",
+ ret <= 0 ? "timeout" : "ok",
+ pollfd[0].events,
+ pollfd[0].revents,
+ pollfd[1].events,
+ pollfd[1].revents);
+ if (ret < 0)
+ continue;
+ if (pollfd[0].revents & POLLIN)
+ pcap_dispatch(p0, burst, do_send, p1);
+ if (pollfd[1].revents & POLLIN)
+ pcap_dispatch(p1, burst, do_send, p0);
+ }
+
+ return (0);
+}
+#endif /* !__PIC__ */
diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c
new file mode 100644
index 0000000..747bd9d
--- /dev/null
+++ b/tools/tools/netmap/pkt-gen.c
@@ -0,0 +1,1021 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: pkt-gen.c 9638 2011-11-07 18:07:43Z luigi $
+ *
+ * Example program to show how to build a multithreaded packet
+ * source/sink using the netmap device.
+ *
+ * In this example we create a programmable number of threads
+ * to take care of all the queues of the interface used to
+ * send or receive traffic.
+ *
+ */
+
+const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n"
+ "http://info.iet.unipi.it/~luigi/netmap/ ";
+
+#include <errno.h>
+#include <pthread.h> /* pthread_* */
+#include <pthread_np.h> /* pthread w/ affinity */
+#include <signal.h> /* signal */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h> /* strcmp */
+#include <fcntl.h> /* open */
+#include <unistd.h> /* close */
+#include <ifaddrs.h> /* getifaddrs */
+
+#include <sys/mman.h> /* PROT_* */
+#include <sys/ioctl.h> /* ioctl */
+#include <sys/poll.h>
+#include <sys/socket.h> /* sockaddr.. */
+#include <arpa/inet.h> /* ntohs */
+#include <sys/param.h>
+#include <sys/cpuset.h> /* cpu_set */
+#include <sys/sysctl.h> /* sysctl */
+#include <sys/time.h> /* timersub */
+
+#include <net/ethernet.h>
+#include <net/if.h> /* ifreq */
+#include <net/if_dl.h> /* LLADDR */
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#include <net/netmap.h>
+#include <net/netmap_user.h>
+#include <pcap/pcap.h>
+
+
+static inline int min(int a, int b) { return a < b ? a : b; }
+
+/* debug support */
+#define D(format, ...) \
+ fprintf(stderr, "%s [%d] " format "\n", \
+ __FUNCTION__, __LINE__, ##__VA_ARGS__)
+
+#ifndef EXPERIMENTAL
+#define EXPERIMENTAL 0
+#endif
+
+int verbose = 0;
+#define MAX_QUEUES 64 /* no need to limit */
+
+#define SKIP_PAYLOAD 1 /* do not check payload. */
+
+#if EXPERIMENTAL
+/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */
+#define netmap_rdtsc(t) \
+ do { \
+ u_int __regs[4]; \
+ \
+ do_cpuid(0, __regs); \
+ (t) = rdtsc(); \
+ } while (0)
+
+static __inline void
+do_cpuid(u_int ax, u_int *p)
+{
+ __asm __volatile("cpuid"
+ : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+ : "0" (ax));
+}
+
+static __inline uint64_t
+rdtsc(void)
+{
+ uint64_t rv;
+
+ __asm __volatile("rdtsc" : "=A" (rv));
+ return (rv);
+}
+#define MAX_SAMPLES 100000
+#endif /* EXPERIMENTAL */
+
+
+struct pkt {
+ struct ether_header eh;
+ struct ip ip;
+ struct udphdr udp;
+ uint8_t body[NETMAP_BUF_SIZE];
+} __attribute__((__packed__));
+
+/*
+ * global arguments for all threads
+ */
+struct glob_arg {
+ const char *src_ip;
+ const char *dst_ip;
+ const char *src_mac;
+ const char *dst_mac;
+ int pkt_size;
+ int burst;
+ int npackets; /* total packets to send */
+ int nthreads;
+ int cpus;
+ int use_pcap;
+ pcap_t *p;
+};
+
+struct mystat {
+ uint64_t containers[8];
+};
+
+/*
+ * Arguments for a new thread. The same structure is used by
+ * the source and the sink
+ */
+struct targ {
+ struct glob_arg *g;
+ int used;
+ int completed;
+ int fd;
+ struct nmreq nmr;
+ struct netmap_if *nifp;
+ uint16_t qfirst, qlast; /* range of queues to scan */
+ uint64_t count;
+ struct timeval tic, toc;
+ int me;
+ pthread_t thread;
+ int affinity;
+
+ uint8_t dst_mac[6];
+ uint8_t src_mac[6];
+ u_int dst_mac_range;
+ u_int src_mac_range;
+ uint32_t dst_ip;
+ uint32_t src_ip;
+ u_int dst_ip_range;
+ u_int src_ip_range;
+
+ struct pkt pkt;
+};
+
+
+static struct targ *targs;
+static int global_nthreads;
+
+/* control-C handler */
+static void
+sigint_h(__unused int sig)
+{
+ for (int i = 0; i < global_nthreads; i++) {
+ /* cancel active threads. */
+ if (targs[i].used == 0)
+ continue;
+
+ D("Cancelling thread #%d\n", i);
+ pthread_cancel(targs[i].thread);
+ targs[i].used = 0;
+ }
+
+ signal(SIGINT, SIG_DFL);
+}
+
+
+/* sysctl wrapper to return the number of active CPUs */
+static int
+system_ncpus(void)
+{
+ int mib[2], ncpus;
+ size_t len;
+
+ mib[0] = CTL_HW;
+ mib[1] = HW_NCPU;
+ len = sizeof(mib);
+ sysctl(mib, 2, &ncpus, &len, NULL, 0);
+
+ return (ncpus);
+}
+
+/*
+ * locate the src mac address for our interface, put it
+ * into the user-supplied buffer. return 0 if ok, -1 on error.
+ */
+static int
+source_hwaddr(const char *ifname, char *buf)
+{
+ struct ifaddrs *ifaphead, *ifap;
+ int l = sizeof(ifap->ifa_name);
+
+ if (getifaddrs(&ifaphead) != 0) {
+ D("getifaddrs %s failed", ifname);
+ return (-1);
+ }
+
+ for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
+ struct sockaddr_dl *sdl =
+ (struct sockaddr_dl *)ifap->ifa_addr;
+ uint8_t *mac;
+
+ if (!sdl || sdl->sdl_family != AF_LINK)
+ continue;
+ if (strncmp(ifap->ifa_name, ifname, l) != 0)
+ continue;
+ mac = (uint8_t *)LLADDR(sdl);
+ sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
+ mac[0], mac[1], mac[2],
+ mac[3], mac[4], mac[5]);
+ if (verbose)
+ D("source hwaddr %s", buf);
+ break;
+ }
+ freeifaddrs(ifaphead);
+ return ifap ? 0 : 1;
+}
+
+
+/* set the thread affinity. */
+static int
+setaffinity(pthread_t me, int i)
+{
+ cpuset_t cpumask;
+
+ if (i == -1)
+ return 0;
+
+ /* Set thread affinity affinity.*/
+ CPU_ZERO(&cpumask);
+ CPU_SET(i, &cpumask);
+
+ if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
+ D("Unable to set affinity");
+ return 1;
+ }
+ return 0;
+}
+
+/* Compute the checksum of the given ip header. */
+static uint16_t
+checksum(const void *data, uint16_t len)
+{
+ const uint8_t *addr = data;
+ uint32_t sum = 0;
+
+ while (len > 1) {
+ sum += addr[0] * 256 + addr[1];
+ addr += 2;
+ len -= 2;
+ }
+
+ if (len == 1)
+ sum += *addr * 256;
+
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16);
+
+ sum = htons(sum);
+
+ return ~sum;
+}
+
+/*
+ * Fill a packet with some payload.
+ */
+static void
+initialize_packet(struct targ *targ)
+{
+ struct pkt *pkt = &targ->pkt;
+ struct ether_header *eh;
+ struct ip *ip;
+ struct udphdr *udp;
+ uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(*ip);
+ int i, l, l0 = strlen(default_payload);
+ char *p;
+
+ for (i = 0; i < paylen;) {
+ l = min(l0, paylen - i);
+ bcopy(default_payload, pkt->body + i, l);
+ i += l;
+ }
+ pkt->body[i-1] = '\0';
+
+ udp = &pkt->udp;
+ udp->uh_sport = htons(1234);
+ udp->uh_dport = htons(4321);
+ udp->uh_ulen = htons(paylen);
+ udp->uh_sum = 0; // checksum(udp, sizeof(*udp));
+
+ ip = &pkt->ip;
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = 5;
+ ip->ip_id = 0;
+ ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
+ ip->ip_id = 0;
+ ip->ip_off = htons(IP_DF); /* Don't fragment */
+ ip->ip_ttl = IPDEFTTL;
+ ip->ip_p = IPPROTO_UDP;
+ inet_aton(targ->g->src_ip, (struct in_addr *)&ip->ip_src);
+ inet_aton(targ->g->dst_ip, (struct in_addr *)&ip->ip_dst);
+ targ->dst_ip = ip->ip_dst.s_addr;
+ targ->src_ip = ip->ip_src.s_addr;
+ p = index(targ->g->src_ip, '-');
+ if (p) {
+ targ->dst_ip_range = atoi(p+1);
+ D("dst-ip sweep %d addresses", targ->dst_ip_range);
+ }
+ ip->ip_sum = checksum(ip, sizeof(*ip));
+
+ eh = &pkt->eh;
+ bcopy(ether_aton(targ->g->src_mac), targ->src_mac, 6);
+ bcopy(targ->src_mac, eh->ether_shost, 6);
+ p = index(targ->g->src_mac, '-');
+ if (p)
+ targ->src_mac_range = atoi(p+1);
+
+ bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
+ bcopy(targ->dst_mac, eh->ether_dhost, 6);
+ p = index(targ->g->dst_mac, '-');
+ if (p)
+ targ->dst_mac_range = atoi(p+1);
+ eh->ether_type = htons(ETHERTYPE_IP);
+}
+
+/* Check the payload of the packet for errors (use it for debug).
+ * Look for consecutive ascii representations of the size of the packet.
+ */
+static void
+check_payload(char *p, int psize)
+{
+ char temp[64];
+ int n_read, size, sizelen;
+
+ /* get the length in ASCII of the length of the packet. */
+ sizelen = sprintf(temp, "%d", psize) + 1; // include a whitespace
+
+ /* dummy payload. */
+ p += 14; /* skip packet header. */
+ n_read = 14;
+ while (psize - n_read >= sizelen) {
+ sscanf(p, "%d", &size);
+ if (size != psize) {
+ D("Read %d instead of %d", size, psize);
+ break;
+ }
+
+ p += sizelen;
+ n_read += sizelen;
+ }
+}
+
+
+/*
+ * create and enqueue a batch of packets on a ring.
+ * On the last one set NS_REPORT to tell the driver to generate
+ * an interrupt when done.
+ */
+static int
+send_packets(struct netmap_ring *ring, struct pkt *pkt,
+ int size, u_int count, int fill_all)
+{
+ u_int sent, cur = ring->cur;
+
+ if (ring->avail < count)
+ count = ring->avail;
+
+ for (sent = 0; sent < count; sent++) {
+ struct netmap_slot *slot = &ring->slot[cur];
+ char *p = NETMAP_BUF(ring, slot->buf_idx);
+
+ if (fill_all)
+ memcpy(p, pkt, size);
+
+ slot->len = size;
+ if (sent == count - 1)
+ slot->flags |= NS_REPORT;
+ cur = NETMAP_RING_NEXT(ring, cur);
+ }
+ ring->avail -= sent;
+ ring->cur = cur;
+
+ return (sent);
+}
+
+static void *
+sender_body(void *data)
+{
+ struct targ *targ = (struct targ *) data;
+
+ struct pollfd fds[1];
+ struct netmap_if *nifp = targ->nifp;
+ struct netmap_ring *txring;
+ int i, n = targ->g->npackets / targ->g->nthreads, sent = 0;
+ int fill_all = 1;
+
+ if (setaffinity(targ->thread, targ->affinity))
+ goto quit;
+ /* setup poll(2) machanism. */
+ memset(fds, 0, sizeof(fds));
+ fds[0].fd = targ->fd;
+ fds[0].events = (POLLOUT);
+
+ /* main loop.*/
+ gettimeofday(&targ->tic, NULL);
+ if (targ->g->use_pcap) {
+ int size = targ->g->pkt_size;
+ void *pkt = &targ->pkt;
+ pcap_t *p = targ->g->p;
+
+ for (; sent < n; sent++) {
+ if (pcap_inject(p, pkt, size) == -1)
+ break;
+ }
+ } else {
+ while (sent < n) {
+
+ /*
+ * wait for available room in the send queue(s)
+ */
+ if (poll(fds, 1, 2000) <= 0) {
+ D("poll error/timeout on queue %d\n", targ->me);
+ goto quit;
+ }
+ /*
+ * scan our queues and send on those with room
+ */
+ if (sent > 100000)
+ fill_all = 0;
+ for (i = targ->qfirst; i < targ->qlast; i++) {
+ int m, limit = MIN(n - sent, targ->g->burst);
+
+ txring = NETMAP_TXRING(nifp, i);
+ if (txring->avail == 0)
+ continue;
+ m = send_packets(txring, &targ->pkt, targ->g->pkt_size,
+ limit, fill_all);
+ sent += m;
+ targ->count = sent;
+ }
+ }
+ /* Tell the interface that we have new packets. */
+ ioctl(fds[0].fd, NIOCTXSYNC, NULL);
+
+ /* final part: wait all the TX queues to be empty. */
+ for (i = targ->qfirst; i < targ->qlast; i++) {
+ txring = NETMAP_TXRING(nifp, i);
+ while (!NETMAP_TX_RING_EMPTY(txring)) {
+ ioctl(fds[0].fd, NIOCTXSYNC, NULL);
+ usleep(1); /* wait 1 tick */
+ }
+ }
+ }
+
+ gettimeofday(&targ->toc, NULL);
+ targ->completed = 1;
+ targ->count = sent;
+
+quit:
+ /* reset the ``used`` flag. */
+ targ->used = 0;
+
+ return (NULL);
+}
+
+
+static void
+receive_pcap(u_char *user, __unused const struct pcap_pkthdr * h,
+ __unused const u_char * bytes)
+{
+ int *count = (int *)user;
+ (*count)++;
+}
+
+static int
+receive_packets(struct netmap_ring *ring, u_int limit, int skip_payload)
+{
+ u_int cur, rx;
+
+ cur = ring->cur;
+ if (ring->avail < limit)
+ limit = ring->avail;
+ for (rx = 0; rx < limit; rx++) {
+ struct netmap_slot *slot = &ring->slot[cur];
+ char *p = NETMAP_BUF(ring, slot->buf_idx);
+
+ if (!skip_payload)
+ check_payload(p, slot->len);
+
+ cur = NETMAP_RING_NEXT(ring, cur);
+ }
+ ring->avail -= rx;
+ ring->cur = cur;
+
+ return (rx);
+}
+
+static void *
+receiver_body(void *data)
+{
+ struct targ *targ = (struct targ *) data;
+ struct pollfd fds[1];
+ struct netmap_if *nifp = targ->nifp;
+ struct netmap_ring *rxring;
+ int i, received = 0;
+
+ if (setaffinity(targ->thread, targ->affinity))
+ goto quit;
+
+ /* setup poll(2) machanism. */
+ memset(fds, 0, sizeof(fds));
+ fds[0].fd = targ->fd;
+ fds[0].events = (POLLIN);
+
+ /* unbounded wait for the first packet. */
+ for (;;) {
+ i = poll(fds, 1, 1000);
+ if (i > 0 && !(fds[0].revents & POLLERR))
+ break;
+ D("waiting for initial packets, poll returns %d %d", i, fds[0].revents);
+ }
+
+ /* main loop, exit after 1s silence */
+ gettimeofday(&targ->tic, NULL);
+ if (targ->g->use_pcap) {
+ for (;;) {
+ pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
+ }
+ } else {
+ while (1) {
+ /* Once we started to receive packets, wait at most 1 seconds
+ before quitting. */
+ if (poll(fds, 1, 1 * 1000) <= 0) {
+ gettimeofday(&targ->toc, NULL);
+ targ->toc.tv_sec -= 1; /* Substract timeout time. */
+ break;
+ }
+
+ for (i = targ->qfirst; i < targ->qlast; i++) {
+ int m;
+
+ rxring = NETMAP_RXRING(nifp, i);
+ if (rxring->avail == 0)
+ continue;
+
+ m = receive_packets(rxring, targ->g->burst,
+ SKIP_PAYLOAD);
+ received += m;
+ targ->count = received;
+ }
+
+ // tell the card we have read the data
+ //ioctl(fds[0].fd, NIOCRXSYNC, NULL);
+ }
+ }
+
+ targ->completed = 1;
+ targ->count = received;
+
+quit:
+ /* reset the ``used`` flag. */
+ targ->used = 0;
+
+ return (NULL);
+}
+
+static void
+tx_output(uint64_t sent, int size, double delta)
+{
+ double amount = 8.0 * (1.0 * size * sent) / delta;
+ double pps = sent / delta;
+ char units[4] = { '\0', 'K', 'M', 'G' };
+ int aunit = 0, punit = 0;
+
+ while (amount >= 1000) {
+ amount /= 1000;
+ aunit += 1;
+ }
+ while (pps >= 1000) {
+ pps /= 1000;
+ punit += 1;
+ }
+
+ printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n",
+ sent, size, delta);
+ printf("Speed: %.2f%cpps. Bandwidth: %.2f%cbps.\n",
+ pps, units[punit], amount, units[aunit]);
+}
+
+
+static void
+rx_output(uint64_t received, double delta)
+{
+
+ double pps = received / delta;
+ char units[4] = { '\0', 'K', 'M', 'G' };
+ int punit = 0;
+
+ while (pps >= 1000) {
+ pps /= 1000;
+ punit += 1;
+ }
+
+ printf("Received %llu packets, in %.2f seconds.\n", received, delta);
+ printf("Speed: %.2f%cpps.\n", pps, units[punit]);
+}
+
+static void
+usage(void)
+{
+ const char *cmd = "pkt-gen";
+ fprintf(stderr,
+ "Usage:\n"
+ "%s arguments\n"
+ "\t-i interface interface name\n"
+ "\t-t pkts_to_send also forces send mode\n"
+ "\t-r pkts_to_receive also forces receive mode\n"
+ "\t-l pkts_size in bytes excluding CRC\n"
+ "\t-d dst-ip end with %%n to sweep n addresses\n"
+ "\t-s src-ip end with %%n to sweep n addresses\n"
+ "\t-D dst-mac end with %%n to sweep n addresses\n"
+ "\t-S src-mac end with %%n to sweep n addresses\n"
+ "\t-b burst size testing, mostly\n"
+ "\t-c cores cores to use\n"
+ "\t-p threads processes/threads to use\n"
+ "\t-T report_ms milliseconds between reports\n"
+ "\t-w wait_for_link_time in seconds\n"
+ "",
+ cmd);
+
+ exit(0);
+}
+
+
+int
+main(int arc, char **argv)
+{
+ int i, fd;
+
+ struct glob_arg g;
+
+ struct nmreq nmr;
+ void *mmap_addr; /* the mmap address */
+ void *(*td_body)(void *) = receiver_body;
+ int ch;
+ int report_interval = 1000; /* report interval */
+ char *ifname = NULL;
+ int wait_link = 2;
+ int devqueues = 1; /* how many device queues */
+
+ bzero(&g, sizeof(g));
+
+ g.src_ip = "10.0.0.1";
+ g.dst_ip = "10.1.0.1";
+ g.dst_mac = "ff:ff:ff:ff:ff:ff";
+ g.src_mac = NULL;
+ g.pkt_size = 60;
+ g.burst = 512; // default
+ g.nthreads = 1;
+ g.cpus = 1;
+
+ while ( (ch = getopt(arc, argv,
+ "i:t:r:l:d:s:D:S:b:c:p:T:w:v")) != -1) {
+ switch(ch) {
+ default:
+ D("bad option %c %s", ch, optarg);
+ usage();
+ break;
+ case 'i': /* interface */
+ ifname = optarg;
+ break;
+ case 't': /* send */
+ td_body = sender_body;
+ g.npackets = atoi(optarg);
+ break;
+ case 'r': /* receive */
+ td_body = receiver_body;
+ g.npackets = atoi(optarg);
+ break;
+ case 'l': /* pkt_size */
+ g.pkt_size = atoi(optarg);
+ break;
+ case 'd':
+ g.dst_ip = optarg;
+ break;
+ case 's':
+ g.src_ip = optarg;
+ break;
+ case 'T': /* report interval */
+ report_interval = atoi(optarg);
+ break;
+ case 'w':
+ wait_link = atoi(optarg);
+ break;
+ case 'b': /* burst */
+ g.burst = atoi(optarg);
+ break;
+ case 'c':
+ g.cpus = atoi(optarg);
+ break;
+ case 'p':
+ g.nthreads = atoi(optarg);
+ break;
+
+ case 'P':
+ g.use_pcap = 1;
+ break;
+
+ case 'D': /* destination mac */
+ g.dst_mac = optarg;
+ {
+ struct ether_addr *mac = ether_aton(g.dst_mac);
+ D("ether_aton(%s) gives %p", g.dst_mac, mac);
+ }
+ break;
+ case 'S': /* source mac */
+ g.src_mac = optarg;
+ break;
+ case 'v':
+ verbose++;
+ }
+ }
+
+ if (ifname == NULL) {
+ D("missing ifname");
+ usage();
+ }
+ {
+ int n = system_ncpus();
+ if (g.cpus < 0 || g.cpus > n) {
+ D("%d cpus is too high, have only %d cpus", g.cpus, n);
+ usage();
+ }
+ if (g.cpus == 0)
+ g.cpus = n;
+ }
+ if (g.pkt_size < 16 || g.pkt_size > 1536) {
+ D("bad pktsize %d\n", g.pkt_size);
+ usage();
+ }
+
+ bzero(&nmr, sizeof(nmr));
+ /*
+ * Open the netmap device to fetch the number of queues of our
+ * interface.
+ *
+ * The first NIOCREGIF also detaches the card from the
+ * protocol stack and may cause a reset of the card,
+ * which in turn may take some time for the PHY to
+ * reconfigure.
+ */
+ fd = open("/dev/netmap", O_RDWR);
+ if (fd == -1) {
+ D("Unable to open /dev/netmap");
+ // fail later
+ } else {
+ if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
+ D("Unable to get if info without name");
+ } else {
+ D("map size is %d Kb", nmr.nr_memsize >> 10);
+ }
+ bzero(&nmr, sizeof(nmr));
+ strncpy(nmr.nr_name, ifname, sizeof(nmr.nr_name));
+ if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
+ D("Unable to get if info for %s", ifname);
+ }
+ devqueues = nmr.nr_numrings;
+ }
+
+ /* validate provided nthreads. */
+ if (g.nthreads < 1 || g.nthreads > devqueues) {
+ D("bad nthreads %d, have %d queues", g.nthreads, devqueues);
+ // continue, fail later
+ }
+
+ if (td_body == sender_body && g.src_mac == NULL) {
+ static char mybuf[20] = "ff:ff:ff:ff:ff:ff";
+ /* retrieve source mac address. */
+ if (source_hwaddr(ifname, mybuf) == -1) {
+ D("Unable to retrieve source mac");
+ // continue, fail later
+ }
+ g.src_mac = mybuf;
+ }
+
+ /*
+ * Map the netmap shared memory: instead of issuing mmap()
+ * inside the body of the threads, we prefer to keep this
+ * operation here to simplify the thread logic.
+ */
+ D("mmapping %d Kbytes", nmr.nr_memsize>>10);
+ mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
+ PROT_WRITE | PROT_READ,
+ MAP_SHARED, fd, 0);
+ if (mmap_addr == MAP_FAILED) {
+ D("Unable to mmap %d KB", nmr.nr_memsize >> 10);
+ // continue, fail later
+ }
+
+ /*
+ * Register the interface on the netmap device: from now on,
+ * we can operate on the network interface without any
+ * interference from the legacy network stack.
+ *
+ * We decide to put the first interface registration here to
+ * give time to cards that take a long time to reset the PHY.
+ */
+ if (ioctl(fd, NIOCREGIF, &nmr) == -1) {
+ D("Unable to register interface %s", ifname);
+ //continue, fail later
+ }
+
+
+ /* Print some debug information. */
+ fprintf(stdout,
+ "%s %s: %d queues, %d threads and %d cpus.\n",
+ (td_body == sender_body) ? "Sending on" : "Receiving from",
+ ifname,
+ devqueues,
+ g.nthreads,
+ g.cpus);
+ if (td_body == sender_body) {
+ fprintf(stdout, "%s -> %s (%s -> %s)\n",
+ g.src_ip, g.dst_ip,
+ g.src_mac, g.dst_mac);
+ }
+
+ /* Exit if something went wrong. */
+ if (fd < 0) {
+ D("aborting");
+ usage();
+ }
+
+
+ /* Wait for PHY reset. */
+ D("Wait %d secs for phy reset", wait_link);
+ sleep(wait_link);
+ D("Ready...");
+
+ /* Install ^C handler. */
+ global_nthreads = g.nthreads;
+ signal(SIGINT, sigint_h);
+
+ if (g.use_pcap) {
+ // XXX g.p = pcap_open_live(..);
+ }
+
+ targs = calloc(g.nthreads, sizeof(*targs));
+ /*
+ * Now create the desired number of threads, each one
+ * using a single descriptor.
+ */
+ for (i = 0; i < g.nthreads; i++) {
+ struct netmap_if *tnifp;
+ struct nmreq tifreq;
+ int tfd;
+
+ if (g.use_pcap) {
+ tfd = -1;
+ tnifp = NULL;
+ } else {
+ /* register interface. */
+ tfd = open("/dev/netmap", O_RDWR);
+ if (tfd == -1) {
+ D("Unable to open /dev/netmap");
+ continue;
+ }
+
+ bzero(&tifreq, sizeof(tifreq));
+ strncpy(tifreq.nr_name, ifname, sizeof(tifreq.nr_name));
+ tifreq.nr_ringid = (g.nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
+
+ /*
+ * if we are acting as a receiver only, do not touch the transmit ring.
+ * This is not the default because many apps may use the interface
+ * in both directions, but a pure receiver does not.
+ */
+ if (td_body == receiver_body) {
+ tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
+ }
+
+ if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
+ D("Unable to register %s", ifname);
+ continue;
+ }
+ tnifp = NETMAP_IF(mmap_addr, tifreq.nr_offset);
+ }
+ /* start threads. */
+ bzero(&targs[i], sizeof(targs[i]));
+ targs[i].g = &g;
+ targs[i].used = 1;
+ targs[i].completed = 0;
+ targs[i].fd = tfd;
+ targs[i].nmr = tifreq;
+ targs[i].nifp = tnifp;
+ targs[i].qfirst = (g.nthreads > 1) ? i : 0;
+ targs[i].qlast = (g.nthreads > 1) ? i+1 : tifreq.nr_numrings;
+ targs[i].me = i;
+ targs[i].affinity = g.cpus ? i % g.cpus : -1;
+ if (td_body == sender_body) {
+ /* initialize the packet to send. */
+ initialize_packet(&targs[i]);
+ }
+
+ if (pthread_create(&targs[i].thread, NULL, td_body,
+ &targs[i]) == -1) {
+ D("Unable to create thread %d", i);
+ targs[i].used = 0;
+ }
+ }
+
+ {
+ uint64_t my_count = 0, prev = 0;
+ uint64_t count = 0;
+ double delta_t;
+ struct timeval tic, toc;
+
+ gettimeofday(&toc, NULL);
+ for (;;) {
+ struct timeval now, delta;
+ uint64_t pps;
+ int done = 0;
+
+ delta.tv_sec = report_interval/1000;
+ delta.tv_usec = (report_interval%1000)*1000;
+ select(0, NULL, NULL, NULL, &delta);
+ gettimeofday(&now, NULL);
+ timersub(&now, &toc, &toc);
+ my_count = 0;
+ for (i = 0; i < g.nthreads; i++) {
+ my_count += targs[i].count;
+ if (targs[i].used == 0)
+ done++;
+ }
+ pps = toc.tv_sec* 1000000 + toc.tv_usec;
+ if (pps < 10000)
+ continue;
+ pps = (my_count - prev)*1000000 / pps;
+ D("%llu pps", pps);
+ prev = my_count;
+ toc = now;
+ if (done == g.nthreads)
+ break;
+ }
+
+ timerclear(&tic);
+ timerclear(&toc);
+ for (i = 0; i < g.nthreads; i++) {
+ /*
+ * Join active threads, unregister interfaces and close
+ * file descriptors.
+ */
+ pthread_join(targs[i].thread, NULL);
+ ioctl(targs[i].fd, NIOCUNREGIF, &targs[i].nmr);
+ close(targs[i].fd);
+
+ if (targs[i].completed == 0)
+ continue;
+
+ /*
+ * Collect threads o1utput and extract information about
+ * how log it took to send all the packets.
+ */
+ count += targs[i].count;
+ if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <))
+ tic = targs[i].tic;
+ if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >))
+ toc = targs[i].toc;
+ }
+
+ /* print output. */
+ timersub(&toc, &tic, &toc);
+ delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
+ if (td_body == sender_body)
+ tx_output(count, g.pkt_size, delta_t);
+ else
+ rx_output(count, delta_t);
+ }
+
+ ioctl(fd, NIOCUNREGIF, &nmr);
+ munmap(mmap_addr, nmr.nr_memsize);
+ close(fd);
+
+ return (0);
+}
+/* end of file */
OpenPOWER on IntegriCloud