summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2013-02-17 04:43:22 +0000
committerluigi <luigi@FreeBSD.org>2013-02-17 04:43:22 +0000
commit1a47ce19ef32d3b258cd99730e59e33cb2435bc1 (patch)
tree0526afe8497c9b0ae15e46aa781d71cabced4ce3
parent67d28f7aa549ac503535c4f0c06a55fb6e7a1a63 (diff)
downloadFreeBSD-src-1a47ce19ef32d3b258cd99730e59e33cb2435bc1.zip
FreeBSD-src-1a47ce19ef32d3b258cd99730e59e33cb2435bc1.tar.gz
update the netmap example programs merging some common code in nm_util.c
pkt-gen now implements several functions (unlimited transmit, receive, ping-pong) and can operate on a 'tap' device.
-rw-r--r--tools/tools/netmap/Makefile9
-rw-r--r--tools/tools/netmap/bridge.c235
-rw-r--r--tools/tools/netmap/nm_util.c251
-rw-r--r--tools/tools/netmap/nm_util.h183
-rw-r--r--tools/tools/netmap/pcap.c337
-rw-r--r--tools/tools/netmap/pkt-gen.c1282
6 files changed, 1366 insertions, 931 deletions
diff --git a/tools/tools/netmap/Makefile b/tools/tools/netmap/Makefile
index 4b682e5..2593a27 100644
--- a/tools/tools/netmap/Makefile
+++ b/tools/tools/netmap/Makefile
@@ -5,7 +5,7 @@
# we can just define 'progs' and create custom targets.
PROGS = pkt-gen bridge testpcap libnetmap.so
-CLEANFILES = $(PROGS) pcap.o
+CLEANFILES = $(PROGS) pcap.o nm_util.o
NO_MAN=
CFLAGS += -Werror -Wall -nostdinc -I/usr/include -I../../../sys
CFLAGS += -Wextra
@@ -17,9 +17,12 @@ LDFLAGS += -lpthread -lpcap
all: $(PROGS)
+pkt-gen bridge: nm_util.o
+ $(CC) $(CFLAGS) -o ${.TARGET} ${.TARGET:=.c} nm_util.o $(LDFLAGS)
+
testpcap: pcap.c libnetmap.so
- $(CC) $(CFLAGS) -L. -lnetmap -o ${.TARGET} pcap.c
+ $(CC) $(CFLAGS) -DTEST -L. -lnetmap -o ${.TARGET} pcap.c
-libnetmap.so: pcap.c
+libnetmap.so: pcap.c nm_util.c
$(CC) $(CFLAGS) -fpic -c ${.ALLSRC}
$(CC) -shared -o ${.TARGET} ${.ALLSRC:.c=.o}
diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c
index 0e9e442..473ee0c 100644
--- a/tools/tools/netmap/bridge.c
+++ b/tools/tools/netmap/bridge.c
@@ -9,195 +9,24 @@
* $FreeBSD$
*/
-#include <errno.h>
-#include <signal.h> /* signal */
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h> /* strcmp */
-#include <fcntl.h> /* open */
-#include <unistd.h> /* close */
+#include "nm_util.h"
-#include <sys/endian.h> /* le64toh */
-#include <sys/mman.h> /* PROT_* */
-#include <sys/ioctl.h> /* ioctl */
-#include <machine/param.h>
-#include <sys/poll.h>
-#include <sys/socket.h> /* sockaddr.. */
-#include <arpa/inet.h> /* ntohs */
-
-#include <net/if.h> /* ifreq */
-#include <net/ethernet.h>
-#include <net/netmap.h>
-#include <net/netmap_user.h>
-
-#include <netinet/in.h> /* sockaddr_in */
-
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
int verbose = 0;
-/* debug support */
-#define ND(format, ...) {}
-#define D(format, ...) do { \
- if (!verbose) break; \
- struct timeval _xxts; \
- gettimeofday(&_xxts, NULL); \
- fprintf(stderr, "%03d.%06d %s [%d] " format "\n", \
- (int)_xxts.tv_sec %1000, (int)_xxts.tv_usec, \
- __FUNCTION__, __LINE__, ##__VA_ARGS__); \
- } while (0)
-
-
-char *version = "$Id: bridge.c 10857 2012-04-06 12:18:22Z luigi $";
+char *version = "$Id: bridge.c 12016 2013-01-23 17:24:22Z luigi $";
static int do_abort = 0;
-/*
- * info on a ring we handle
- */
-struct my_ring {
- const char *ifname;
- int fd;
- char *mem; /* userspace mmap address */
- u_int memsize;
- u_int queueid;
- u_int begin, end; /* first..last+1 rings to check */
- struct netmap_if *nifp;
- struct netmap_ring *tx, *rx; /* shortcuts */
-
- uint32_t if_flags;
- uint32_t if_reqcap;
- uint32_t if_curcap;
-};
-
static void
-sigint_h(__unused int sig)
+sigint_h(int sig)
{
+ (void)sig; /* UNUSED */
do_abort = 1;
signal(SIGINT, SIG_DFL);
}
-static int
-do_ioctl(struct my_ring *me, unsigned long what)
-{
- struct ifreq ifr;
- int error;
-
- bzero(&ifr, sizeof(ifr));
- strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name));
- switch (what) {
- case SIOCSIFFLAGS:
- ifr.ifr_flagshigh = me->if_flags >> 16;
- ifr.ifr_flags = me->if_flags & 0xffff;
- break;
- case SIOCSIFCAP:
- ifr.ifr_reqcap = me->if_reqcap;
- ifr.ifr_curcap = me->if_curcap;
- break;
- }
- error = ioctl(me->fd, what, &ifr);
- if (error) {
- D("ioctl error 0x%lx", what);
- return error;
- }
- switch (what) {
- case SIOCGIFFLAGS:
- me->if_flags = (ifr.ifr_flagshigh << 16) |
- (0xffff & ifr.ifr_flags);
- if (verbose)
- D("flags are 0x%x", me->if_flags);
- break;
-
- case SIOCGIFCAP:
- me->if_reqcap = ifr.ifr_reqcap;
- me->if_curcap = ifr.ifr_curcap;
- if (verbose)
- D("curcap are 0x%x", me->if_curcap);
- break;
- }
- return 0;
-}
-
-/*
- * open a device. if me->mem is null then do an mmap.
- */
-static int
-netmap_open(struct my_ring *me, int ringid)
-{
- int fd, err, l;
- struct nmreq req;
-
- me->fd = fd = open("/dev/netmap", O_RDWR);
- if (fd < 0) {
- D("Unable to open /dev/netmap");
- return (-1);
- }
- bzero(&req, sizeof(req));
- strncpy(req.nr_name, me->ifname, sizeof(req.nr_name));
- req.nr_ringid = ringid;
- req.nr_version = NETMAP_API;
- err = ioctl(fd, NIOCGINFO, &req);
- if (err) {
- D("cannot get info on %s", me->ifname);
- goto error;
- }
- me->memsize = l = req.nr_memsize;
- if (verbose)
- D("memsize is %d MB", l>>20);
- err = ioctl(fd, NIOCREGIF, &req);
- if (err) {
- D("Unable to register %s", me->ifname);
- goto error;
- }
-
- if (me->mem == NULL) {
- me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
- if (me->mem == MAP_FAILED) {
- D("Unable to mmap");
- me->mem = NULL;
- goto error;
- }
- }
-
- me->nifp = NETMAP_IF(me->mem, req.nr_offset);
- me->queueid = ringid;
- if (ringid & NETMAP_SW_RING) {
- me->begin = req.nr_rx_rings;
- me->end = me->begin + 1;
- me->tx = NETMAP_TXRING(me->nifp, req.nr_tx_rings);
- me->rx = NETMAP_RXRING(me->nifp, req.nr_rx_rings);
- } else if (ringid & NETMAP_HW_RING) {
- D("XXX check multiple threads");
- me->begin = ringid & NETMAP_RING_MASK;
- me->end = me->begin + 1;
- me->tx = NETMAP_TXRING(me->nifp, me->begin);
- me->rx = NETMAP_RXRING(me->nifp, me->begin);
- } else {
- me->begin = 0;
- me->end = req.nr_rx_rings; // XXX max of the two
- me->tx = NETMAP_TXRING(me->nifp, 0);
- me->rx = NETMAP_RXRING(me->nifp, 0);
- }
- return (0);
-error:
- close(me->fd);
- return -1;
-}
-
-
-static int
-netmap_close(struct my_ring *me)
-{
- D("");
- if (me->mem)
- munmap(me->mem, me->memsize);
- ioctl(me->fd, NIOCUNREGIF, NULL);
- close(me->fd);
- return (0);
-}
-
-
/*
* move up to 'limit' pkts from rxring to txring swapping buffers.
*/
@@ -237,7 +66,7 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
if (rs->len < 14 || rs->len > 2048)
D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k);
else if (verbose > 1)
- D("send len %d rx[%d] -> tx[%d]", rs->len, j, k);
+ D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k);
ts->len = rs->len;
/* report the buffer change. */
@@ -251,7 +80,7 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
rxring->cur = j;
txring->cur = k;
if (verbose && m > 0)
- D("sent %d packets to %p", m, txring);
+ D("%s sent %d packets to %p", msg, m, txring);
return (m);
}
@@ -287,7 +116,7 @@ move(struct my_ring *src, struct my_ring *dst, u_int limit)
* how many packets on this set of queues ?
*/
static int
-howmany(struct my_ring *me, int tx)
+pkt_queued(struct my_ring *me, int tx)
{
u_int i, tot = 0;
@@ -337,6 +166,7 @@ main(int argc, char **argv)
while ( (ch = getopt(argc, argv, "b:i:vw:")) != -1) {
switch (ch) {
+ default:
D("bad option %c %s", ch, optarg);
usage();
break;
@@ -361,6 +191,7 @@ main(int argc, char **argv)
}
}
+
argc -= optind;
argv += optind;
@@ -394,44 +225,12 @@ main(int argc, char **argv)
/* two different interfaces. Take all rings on if1 */
i = 0; // all hw rings
}
- if (netmap_open(me, i))
+ if (netmap_open(me, i, 1))
return (1);
me[1].mem = me[0].mem; /* copy the pointer, so only one mmap */
- if (netmap_open(me+1, 0))
+ if (netmap_open(me+1, 0, 1))
return (1);
- /* if bridging two interfaces, set promisc mode */
- if (i != NETMAP_SW_RING) {
- do_ioctl(me, SIOCGIFFLAGS);
- if ((me[0].if_flags & IFF_UP) == 0) {
- D("%s is down, bringing up...", me[0].ifname);
- me[0].if_flags |= IFF_UP;
- }
- me[0].if_flags |= IFF_PPROMISC;
- do_ioctl(me, SIOCSIFFLAGS);
-
- do_ioctl(me+1, SIOCGIFFLAGS);
- me[1].if_flags |= IFF_PPROMISC;
- do_ioctl(me+1, SIOCSIFFLAGS);
-
- /* also disable checksums etc. */
- do_ioctl(me, SIOCGIFCAP);
- me[0].if_reqcap = me[0].if_curcap;
- me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
- do_ioctl(me+0, SIOCSIFCAP);
- }
- do_ioctl(me+1, SIOCGIFFLAGS);
- if ((me[1].if_flags & IFF_UP) == 0) {
- D("%s is down, bringing up...", me[1].ifname);
- me[1].if_flags |= IFF_UP;
- }
- do_ioctl(me+1, SIOCSIFFLAGS);
-
- do_ioctl(me+1, SIOCGIFCAP);
- me[1].if_reqcap = me[1].if_curcap;
- me[1].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
- do_ioctl(me+1, SIOCSIFCAP);
-
/* setup poll(2) variables. */
memset(pollfd, 0, sizeof(pollfd));
for (i = 0; i < 2; i++) {
@@ -451,8 +250,8 @@ main(int argc, char **argv)
int n0, n1, ret;
pollfd[0].events = pollfd[1].events = 0;
pollfd[0].revents = pollfd[1].revents = 0;
- n0 = howmany(me, 0);
- n1 = howmany(me + 1, 0);
+ n0 = pkt_queued(me, 0);
+ n1 = pkt_queued(me + 1, 0);
if (n0)
pollfd[1].events |= POLLOUT;
else
@@ -468,14 +267,14 @@ main(int argc, char **argv)
ret <= 0 ? "timeout" : "ok",
pollfd[0].events,
pollfd[0].revents,
- howmany(me, 0),
+ pkt_queued(me, 0),
me[0].rx->cur,
- howmany(me, 1),
+ pkt_queued(me, 1),
pollfd[1].events,
pollfd[1].revents,
- howmany(me+1, 0),
+ pkt_queued(me+1, 0),
me[1].rx->cur,
- howmany(me+1, 1)
+ pkt_queued(me+1, 1)
);
if (ret < 0)
continue;
diff --git a/tools/tools/netmap/nm_util.c b/tools/tools/netmap/nm_util.c
new file mode 100644
index 0000000..2b2c0ca
--- /dev/null
+++ b/tools/tools/netmap/nm_util.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2012 Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id$
+ *
+ * utilities to use netmap devices.
+ * This does the basic functions of opening a device and issuing
+ * ioctls()
+ */
+
+#include "nm_util.h"
+
+extern int verbose;
+
+int
+nm_do_ioctl(struct my_ring *me, u_long what, int subcmd)
+{
+ struct ifreq ifr;
+ int error;
+#if defined( __FreeBSD__ ) || defined (__APPLE__)
+ int fd = me->fd;
+#endif
+#ifdef linux
+ struct ethtool_value eval;
+ int fd;
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ printf("Error: cannot get device control socket.\n");
+ return -1;
+ }
+#endif /* linux */
+
+ (void)subcmd; // unused
+ bzero(&ifr, sizeof(ifr));
+ strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name));
+ switch (what) {
+ case SIOCSIFFLAGS:
+#ifndef __APPLE__
+ ifr.ifr_flagshigh = me->if_flags >> 16;
+#endif
+ ifr.ifr_flags = me->if_flags & 0xffff;
+ break;
+
+#if defined( __FreeBSD__ )
+ case SIOCSIFCAP:
+ ifr.ifr_reqcap = me->if_reqcap;
+ ifr.ifr_curcap = me->if_curcap;
+ break;
+#endif
+#ifdef linux
+ case SIOCETHTOOL:
+ eval.cmd = subcmd;
+ eval.data = 0;
+ ifr.ifr_data = (caddr_t)&eval;
+ break;
+#endif /* linux */
+ }
+ error = ioctl(fd, what, &ifr);
+ if (error)
+ goto done;
+ switch (what) {
+ case SIOCGIFFLAGS:
+#ifndef __APPLE__
+ me->if_flags = (ifr.ifr_flagshigh << 16) |
+ (0xffff & ifr.ifr_flags);
+#endif
+ if (verbose)
+ D("flags are 0x%x", me->if_flags);
+ break;
+
+#if defined( __FreeBSD__ )
+ case SIOCGIFCAP:
+ me->if_reqcap = ifr.ifr_reqcap;
+ me->if_curcap = ifr.ifr_curcap;
+ if (verbose)
+ D("curcap are 0x%x", me->if_curcap);
+ break;
+#endif /* __FreeBSD__ */
+ }
+done:
+#ifdef linux
+ close(fd);
+#endif
+ if (error)
+ D("ioctl error %d %lu", error, what);
+ return error;
+}
+
+/*
+ * open a device. if me->mem is null then do an mmap.
+ * Returns the file descriptor.
+ * The extra flag checks configures promisc mode.
+ */
+int
+netmap_open(struct my_ring *me, int ringid, int promisc)
+{
+ int fd, err, l;
+ struct nmreq req;
+
+ me->fd = fd = open("/dev/netmap", O_RDWR);
+ if (fd < 0) {
+ D("Unable to open /dev/netmap");
+ return (-1);
+ }
+ bzero(&req, sizeof(req));
+ req.nr_version = NETMAP_API;
+ strncpy(req.nr_name, me->ifname, sizeof(req.nr_name));
+ req.nr_ringid = ringid;
+ err = ioctl(fd, NIOCGINFO, &req);
+ if (err) {
+ D("cannot get info on %s, errno %d ver %d",
+ me->ifname, errno, req.nr_version);
+ goto error;
+ }
+ me->memsize = l = req.nr_memsize;
+ if (verbose)
+ D("memsize is %d MB", l>>20);
+ err = ioctl(fd, NIOCREGIF, &req);
+ if (err) {
+ D("Unable to register %s", me->ifname);
+ goto error;
+ }
+
+ if (me->mem == NULL) {
+ me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
+ if (me->mem == MAP_FAILED) {
+ D("Unable to mmap");
+ me->mem = NULL;
+ goto error;
+ }
+ }
+
+
+ /* Set the operating mode. */
+ if (ringid != NETMAP_SW_RING) {
+ nm_do_ioctl(me, SIOCGIFFLAGS, 0);
+ if ((me[0].if_flags & IFF_UP) == 0) {
+ D("%s is down, bringing up...", me[0].ifname);
+ me[0].if_flags |= IFF_UP;
+ }
+ if (promisc) {
+ me[0].if_flags |= IFF_PPROMISC;
+ nm_do_ioctl(me, SIOCSIFFLAGS, 0);
+ }
+
+#ifdef __FreeBSD__
+ /* also disable checksums etc. */
+ nm_do_ioctl(me, SIOCGIFCAP, 0);
+ me[0].if_reqcap = me[0].if_curcap;
+ me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
+ nm_do_ioctl(me+0, SIOCSIFCAP, 0);
+#endif
+#ifdef linux
+ /* disable:
+ * - generic-segmentation-offload
+ * - tcp-segmentation-offload
+ * - rx-checksumming
+ * - tx-checksumming
+ * XXX check how to set back the caps.
+ */
+ nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SGSO);
+ nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STSO);
+ nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SRXCSUM);
+ nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STXCSUM);
+#endif /* linux */
+ }
+
+ me->nifp = NETMAP_IF(me->mem, req.nr_offset);
+ me->queueid = ringid;
+ if (ringid & NETMAP_SW_RING) {
+ me->begin = req.nr_rx_rings;
+ me->end = me->begin + 1;
+ me->tx = NETMAP_TXRING(me->nifp, req.nr_tx_rings);
+ me->rx = NETMAP_RXRING(me->nifp, req.nr_rx_rings);
+ } else if (ringid & NETMAP_HW_RING) {
+ D("XXX check multiple threads");
+ me->begin = ringid & NETMAP_RING_MASK;
+ me->end = me->begin + 1;
+ me->tx = NETMAP_TXRING(me->nifp, me->begin);
+ me->rx = NETMAP_RXRING(me->nifp, me->begin);
+ } else {
+ me->begin = 0;
+ me->end = req.nr_rx_rings; // XXX max of the two
+ me->tx = NETMAP_TXRING(me->nifp, 0);
+ me->rx = NETMAP_RXRING(me->nifp, 0);
+ }
+ return (0);
+error:
+ close(me->fd);
+ return -1;
+}
+
+
+int
+netmap_close(struct my_ring *me)
+{
+ D("");
+ if (me->mem)
+ munmap(me->mem, me->memsize);
+ ioctl(me->fd, NIOCUNREGIF, NULL);
+ close(me->fd);
+ return (0);
+}
+
+
+/*
+ * how many packets on this set of queues ?
+ */
+int
+pkt_queued(struct my_ring *me, int tx)
+{
+ u_int i, tot = 0;
+
+ ND("me %p begin %d end %d", me, me->begin, me->end);
+ for (i = me->begin; i < me->end; i++) {
+ struct netmap_ring *ring = tx ?
+ NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i);
+ tot += ring->avail;
+ }
+ if (0 && verbose && tot && !tx)
+ D("ring %s %s %s has %d avail at %d",
+ me->ifname, tx ? "tx": "rx",
+ me->end >= me->nifp->ni_tx_rings ? // XXX who comes first ?
+ "host":"net",
+ tot, NETMAP_TXRING(me->nifp, me->begin)->cur);
+ return tot;
+}
diff --git a/tools/tools/netmap/nm_util.h b/tools/tools/netmap/nm_util.h
new file mode 100644
index 0000000..0d64f13
--- /dev/null
+++ b/tools/tools/netmap/nm_util.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2012 Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id$
+ *
+ * Some utilities to build netmap-based programs.
+ */
+
+#ifndef _NM_UTIL_H
+#define _NM_UTIL_H
+#include <errno.h>
+#include <signal.h> /* signal */
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h> /* PRI* macros */
+#include <string.h> /* strcmp */
+#include <fcntl.h> /* open */
+#include <unistd.h> /* close */
+#include <ifaddrs.h> /* getifaddrs */
+
+#include <sys/mman.h> /* PROT_* */
+#include <sys/ioctl.h> /* ioctl */
+#include <sys/poll.h>
+#include <sys/socket.h> /* sockaddr.. */
+#include <arpa/inet.h> /* ntohs */
+#include <sys/param.h>
+#include <sys/sysctl.h> /* sysctl */
+#include <sys/time.h> /* timersub */
+
+#include <net/ethernet.h>
+#include <net/if.h> /* ifreq */
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#include <net/netmap.h>
+#include <net/netmap_user.h>
+
+#ifndef MY_PCAP /* use the system's pcap if available */
+
+#ifdef NO_PCAP
+#define PCAP_ERRBUF_SIZE 512
+typedef void pcap_t;
+struct pcap_pkthdr;
+#define pcap_inject(a,b,c) ((void)a, (void)b, (void)c, -1)
+#define pcap_dispatch(a, b, c, d) (void)c
+#define pcap_open_live(a, b, c, d, e) ((void)e, NULL)
+#else /* !NO_PCAP */
+#include <pcap/pcap.h> // XXX do we need it ?
+#endif /* !NO_PCAP */
+
+#endif // XXX hack
+
+#include <pthread.h> /* pthread_* */
+
+#ifdef linux
+#define ifr_flagshigh ifr_flags
+#define ifr_curcap ifr_flags
+#define ifr_reqcap ifr_flags
+#define IFF_PPROMISC IFF_PROMISC
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
+#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
+#include <netinet/ether.h> /* ether_aton */
+#include <linux/if_packet.h> /* sockaddr_ll */
+#endif /* linux */
+
+#ifdef __FreeBSD__
+#include <sys/endian.h> /* le64toh */
+#include <machine/param.h>
+
+#include <pthread_np.h> /* pthread w/ affinity */
+#include <sys/cpuset.h> /* cpu_set */
+#include <net/if_dl.h> /* LLADDR */
+#endif /* __FreeBSD__ */
+
+#ifdef __APPLE__
+#define ifr_flagshigh ifr_flags // XXX
+#define IFF_PPROMISC IFF_PROMISC
+#include <net/if_dl.h> /* LLADDR */
+#define clock_gettime(a,b) \
+ do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
+#endif /* __APPLE__ */
+
+static inline int min(int a, int b) { return a < b ? a : b; }
+extern int time_second;
+
+/* debug support */
+#define ND(format, ...) do {} while(0)
+#define D(format, ...) \
+ fprintf(stderr, "%s [%d] " format "\n", \
+ __FUNCTION__, __LINE__, ##__VA_ARGS__)
+
+#define RD(lps, format, ...) \
+ do { \
+ static int t0, cnt; \
+ if (t0 != time_second) { \
+ t0 = time_second; \
+ cnt = 0; \
+ } \
+ if (cnt++ < lps) \
+ D(format, ##__VA_ARGS__); \
+ } while (0)
+
+
+
+// XXX does it work on 32-bit machines ?
+static inline void prefetch (const void *x)
+{
+ __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x));
+}
+
+// XXX only for multiples of 64 bytes, non overlapped.
+static inline void
+pkt_copy(const void *_src, void *_dst, int l)
+{
+ const uint64_t *src = _src;
+ uint64_t *dst = _dst;
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+ if (unlikely(l >= 1024)) {
+ bcopy(src, dst, l);
+ return;
+ }
+ for (; l > 0; l-=64) {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ }
+}
+
+/*
+ * info on a ring we handle
+ */
+struct my_ring {
+ const char *ifname;
+ int fd;
+ char *mem; /* userspace mmap address */
+ u_int memsize;
+ u_int queueid;
+ u_int begin, end; /* first..last+1 rings to check */
+ struct netmap_if *nifp;
+ struct netmap_ring *tx, *rx; /* shortcuts */
+
+ uint32_t if_flags;
+ uint32_t if_reqcap;
+ uint32_t if_curcap;
+};
+int netmap_open(struct my_ring *me, int ringid, int promisc);
+int netmap_close(struct my_ring *me);
+int nm_do_ioctl(struct my_ring *me, u_long what, int subcmd);
+#endif /* _NM_UTIL_H */
diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c
index 2125176..c2acd1a 100644
--- a/tools/tools/netmap/pcap.c
+++ b/tools/tools/netmap/pcap.c
@@ -1,5 +1,5 @@
/*
- * (C) 2011 Luigi Rizzo
+ * (C) 2011-2012 Luigi Rizzo
*
* BSD license
*
@@ -10,81 +10,18 @@
* $FreeBSD$
*/
-#include <errno.h>
-#include <signal.h> /* signal */
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h> /* strcmp */
-#include <fcntl.h> /* open */
-#include <unistd.h> /* close */
+#define MY_PCAP
+#include "nm_util.h"
-#include <sys/endian.h> /* le64toh */
-#include <sys/mman.h> /* PROT_* */
-#include <sys/ioctl.h> /* ioctl */
-#include <machine/param.h>
-#include <sys/poll.h>
-#include <sys/socket.h> /* sockaddr.. */
-#include <arpa/inet.h> /* ntohs */
-
-#include <net/if.h> /* ifreq */
-#include <net/ethernet.h>
-#include <net/netmap.h>
-#include <net/netmap_user.h>
-
-#include <netinet/in.h> /* sockaddr_in */
-
-#include <sys/socket.h>
-#include <ifaddrs.h>
-
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-
-const char *version = "$Id$";
+char *version = "$Id: pcap.c 11463 2012-07-30 15:26:02Z luigi $";
int verbose = 0;
-/* debug support */
-#define ND(format, ...) do {} while (0)
-#define D(format, ...) do { \
- if (verbose) \
- fprintf(stderr, "--- %s [%d] " format "\n", \
- __FUNCTION__, __LINE__, ##__VA_ARGS__); \
- } while (0)
-
-static inline void prefetch (const void *x)
-{
- __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x));
-}
-
-// XXX only for multiples of 64 bytes, non overlapped.
-static inline void
-pkt_copy(const void *_src, void *_dst, int l)
-{
- const uint64_t *src = _src;
- uint64_t *dst = _dst;
-#define likely(x) __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
- if (unlikely(l >= 1024)) {
- bcopy(src, dst, l);
- return;
- }
- for (; l > 0; l-=64) {
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- }
-}
-
/*
* We redefine here a number of structures that are in pcap.h
* so we can compile this file without the system header.
*/
#ifndef PCAP_ERRBUF_SIZE
#define PCAP_ERRBUF_SIZE 128
-
/*
* Each packet is accompanied by a header including the timestamp,
* captured size and actual size.
@@ -135,12 +72,13 @@ typedef enum {
PCAP_D_OUT
} pcap_direction_t;
-struct bpf_program;
typedef void (*pcap_handler)(u_char *user,
const struct pcap_pkthdr *h, const u_char *bytes);
+char errbuf[PCAP_ERRBUF_SIZE];
+
pcap_t *pcap_open_live(const char *device, int snaplen,
int promisc, int to_ms, char *errbuf);
@@ -154,24 +92,6 @@ char *pcap_lookupdev(char *errbuf);
int pcap_inject(pcap_t *p, const void *buf, size_t size);
int pcap_fileno(pcap_t *p);
const char *pcap_lib_version(void);
-void pcap_freealldevs(pcap_if_t *);
-pcap_t *pcap_create(const char *, char *);
-int pcap_activate(pcap_t *);
-int pcap_can_set_rfmon(pcap_t *);
-int pcap_set_snaplen(pcap_t *, int);
-int pcap_snapshot(pcap_t *);
-int pcap_lookupnet(const char *, uint32_t *, uint32_t *, char *);
-int pcap_set_promisc(pcap_t *, int);
-int pcap_set_timeout(pcap_t *, int);
-int pcap_compile(pcap_t *, struct bpf_program *, const char *, int,
- uint32_t);
-int pcap_setfilter(pcap_t *, struct bpf_program *);
-int pcap_datalink(pcap_t *);
-const char *pcap_datalink_val_to_name(int);
-const char *pcap_datalink_val_to_description(int);
-int pcap_stats(pcap_t *, struct pcap_stat *);
-int pcap_loop(pcap_t *, int, pcap_handler, u_char *);
-char *pcap_geterr(pcap_t *);
struct eproto {
@@ -180,7 +100,7 @@ struct eproto {
};
#endif /* !PCAP_ERRBUF_SIZE */
-#ifdef __PIC__
+#ifndef TEST
/*
* build as a shared library
*/
@@ -190,8 +110,12 @@ char pcap_version[] = "libnetmap version 0.3";
/*
* Our equivalent of pcap_t
*/
-struct my_ring {
- struct nmreq nmr;
+struct pcap_ring {
+ struct my_ring me;
+#if 0
+ const char *ifname;
+
+ //struct nmreq nmr;
int fd;
char *mem; /* userspace mmap address */
@@ -200,6 +124,10 @@ struct my_ring {
u_int begin, end; /* first..last+1 rings to check */
struct netmap_if *nifp;
+ uint32_t if_flags;
+ uint32_t if_reqcap;
+ uint32_t if_curcap;
+#endif
int snaplen;
char *errbuf;
int promisc;
@@ -207,9 +135,6 @@ struct my_ring {
struct pcap_pkthdr hdr;
- uint32_t if_flags;
- uint32_t if_reqcap;
- uint32_t if_curcap;
struct pcap_stat st;
@@ -217,114 +142,6 @@ struct my_ring {
};
-static int
-do_ioctl(struct my_ring *me, unsigned long what)
-{
- struct ifreq ifr;
- int error;
-
- bzero(&ifr, sizeof(ifr));
- strncpy(ifr.ifr_name, me->nmr.nr_name, sizeof(ifr.ifr_name));
- switch (what) {
- case SIOCSIFFLAGS:
- D("call SIOCSIFFLAGS 0x%x", me->if_flags);
- ifr.ifr_flagshigh = (me->if_flags >> 16) & 0xffff;
- ifr.ifr_flags = me->if_flags & 0xffff;
- break;
- case SIOCSIFCAP:
- ifr.ifr_reqcap = me->if_reqcap;
- ifr.ifr_curcap = me->if_curcap;
- break;
- }
- error = ioctl(me->fd, what, &ifr);
- if (error) {
- D("ioctl 0x%lx error %d", what, error);
- return error;
- }
- switch (what) {
- case SIOCSIFFLAGS:
- case SIOCGIFFLAGS:
- me->if_flags = (ifr.ifr_flagshigh << 16) |
- (0xffff & ifr.ifr_flags);
- D("flags are L 0x%x H 0x%x 0x%x",
- (uint16_t)ifr.ifr_flags,
- (uint16_t)ifr.ifr_flagshigh, me->if_flags);
- break;
-
- case SIOCGIFCAP:
- me->if_reqcap = ifr.ifr_reqcap;
- me->if_curcap = ifr.ifr_curcap;
- D("curcap are 0x%x", me->if_curcap);
- break;
- }
- return 0;
-}
-
-
-/*
- * open a device. if me->mem is null then do an mmap.
- */
-static int
-netmap_open(struct my_ring *me, int ringid)
-{
- int fd, err, l;
- u_int i;
- struct nmreq req;
-
- me->fd = fd = open("/dev/netmap", O_RDWR);
- if (fd < 0) {
- D("Unable to open /dev/netmap");
- return (-1);
- }
- bzero(&req, sizeof(req));
- strncpy(req.nr_name, me->nmr.nr_name, sizeof(req.nr_name));
- req.nr_ringid = ringid;
- req.nr_version = NETMAP_API;
- err = ioctl(fd, NIOCGINFO, &req);
- if (err) {
- D("cannot get info on %s", me->nmr.nr_name);
- goto error;
- }
- me->memsize = l = req.nr_memsize;
- ND("memsize is %d MB", l>>20);
- err = ioctl(fd, NIOCREGIF, &req);
- if (err) {
- D("Unable to register %s", me->nmr.nr_name);
- goto error;
- }
-
- if (me->mem == NULL) {
- me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
- if (me->mem == MAP_FAILED) {
- D("Unable to mmap");
- me->mem = NULL;
- goto error;
- }
- }
-
- me->nifp = NETMAP_IF(me->mem, req.nr_offset);
- me->queueid = ringid;
- if (ringid & NETMAP_SW_RING) {
- me->begin = req.nr_rx_rings;
- me->end = me->begin + 1;
- } else if (ringid & NETMAP_HW_RING) {
- me->begin = ringid & NETMAP_RING_MASK;
- me->end = me->begin + 1;
- } else {
- me->begin = 0;
- me->end = req.nr_rx_rings;
- }
- /* request timestamps for packets */
- for (i = me->begin; i < me->end; i++) {
- struct netmap_ring *ring = NETMAP_RXRING(me->nifp, i);
- ring->flags = NR_TIMESTAMP;
- }
- //me->tx = NETMAP_TXRING(me->nifp, 0);
- return (0);
-error:
- close(me->fd);
- return -1;
-}
/*
* There is a set of functions that tcpdump expects even if probably
@@ -343,10 +160,12 @@ const char *pcap_lib_version(void)
}
int
-pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf)
+pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf)
{
+ pcap_if_t *top = NULL;
+#ifndef linux
struct ifaddrs *i_head, *i;
- pcap_if_t *top = NULL, *cur;
+ pcap_if_t *cur;
struct pcap_addr *tail = NULL;
int l;
@@ -397,7 +216,7 @@ pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf)
}
#define SA_NEXT(x) ((struct sockaddr *)((char *)(x) + (x)->sa_len))
pca->addr = (struct sockaddr *)(pca + 1);
- bcopy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len);
+ pkt_copy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len);
if (i->ifa_netmask) {
pca->netmask = SA_NEXT(pca->addr);
bcopy(i->ifa_netmask, pca->netmask, i->ifa_netmask->sa_len);
@@ -415,12 +234,15 @@ pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf)
}
freeifaddrs(i_head);
+#endif /* !linux */
+ (void)errbuf; /* UNUSED */
*alldevsp = top;
return 0;
}
-void pcap_freealldevs(__unused pcap_if_t *alldevs)
+void pcap_freealldevs(pcap_if_t *alldevs)
{
+ (void)alldevs; /* UNUSED */
D("unimplemented");
}
@@ -447,8 +269,9 @@ pcap_activate(pcap_t *p)
}
int
-pcap_can_set_rfmon(__unused pcap_t *p)
+pcap_can_set_rfmon(pcap_t *p)
{
+ (void)p; /* UNUSED */
D("");
return 0; /* no we can't */
}
@@ -456,7 +279,7 @@ pcap_can_set_rfmon(__unused pcap_t *p)
int
pcap_set_snaplen(pcap_t *p, int snaplen)
{
- struct my_ring *me = p;
+ struct pcap_ring *me = p;
D("len %d", snaplen);
me->snaplen = snaplen;
@@ -466,7 +289,7 @@ pcap_set_snaplen(pcap_t *p, int snaplen)
int
pcap_snapshot(pcap_t *p)
{
- struct my_ring *me = p;
+ struct pcap_ring *me = p;
D("len %d", me->snaplen);
return me->snaplen;
@@ -474,9 +297,10 @@ pcap_snapshot(pcap_t *p)
int
pcap_lookupnet(const char *device, uint32_t *netp,
- uint32_t *maskp, __unused char *errbuf)
+ uint32_t *maskp, char *errbuf)
{
+ (void)errbuf; /* UNUSED */
D("device %s", device);
inet_aton("10.0.0.255", (struct in_addr *)netp);
inet_aton("255.255.255.0",(struct in_addr *) maskp);
@@ -486,17 +310,17 @@ pcap_lookupnet(const char *device, uint32_t *netp,
int
pcap_set_promisc(pcap_t *p, int promisc)
{
- struct my_ring *me = p;
+ struct pcap_ring *me = p;
D("promisc %d", promisc);
- if (do_ioctl(me, SIOCGIFFLAGS))
+ if (nm_do_ioctl(&me->me, SIOCGIFFLAGS, 0))
D("SIOCGIFFLAGS failed");
if (promisc) {
- me->if_flags |= IFF_PPROMISC;
+ me->me.if_flags |= IFF_PPROMISC;
} else {
- me->if_flags &= ~IFF_PPROMISC;
+ me->me.if_flags &= ~IFF_PPROMISC;
}
- if (do_ioctl(me, SIOCSIFFLAGS))
+ if (nm_do_ioctl(&me->me, SIOCSIFFLAGS, 0))
D("SIOCSIFFLAGS failed");
return 0;
}
@@ -504,7 +328,7 @@ pcap_set_promisc(pcap_t *p, int promisc)
int
pcap_set_timeout(pcap_t *p, int to_ms)
{
- struct my_ring *me = p;
+ struct pcap_ring *me = p;
D("%d ms", to_ms);
me->to_ms = to_ms;
@@ -514,23 +338,30 @@ pcap_set_timeout(pcap_t *p, int to_ms)
struct bpf_program;
int
-pcap_compile(__unused pcap_t *p, __unused struct bpf_program *fp,
- const char *str, __unused int optimize, __unused uint32_t netmask)
+pcap_compile(pcap_t *p, struct bpf_program *fp,
+ const char *str, int optimize, uint32_t netmask)
{
+ (void)p; /* UNUSED */
+ (void)fp; /* UNUSED */
+ (void)optimize; /* UNUSED */
+ (void)netmask; /* UNUSED */
D("%s", str);
return 0;
}
int
-pcap_setfilter(__unused pcap_t *p, __unused struct bpf_program *fp)
+pcap_setfilter(pcap_t *p, struct bpf_program *fp)
{
+ (void)p; /* UNUSED */
+ (void)fp; /* UNUSED */
D("");
return 0;
}
int
-pcap_datalink(__unused pcap_t *p)
+pcap_datalink(pcap_t *p)
{
+ (void)p; /* UNUSED */
D("returns 1");
return 1; // ethernet
}
@@ -553,7 +384,7 @@ struct pcap_stat;
int
pcap_stats(pcap_t *p, struct pcap_stat *ps)
{
- struct my_ring *me = p;
+ struct pcap_ring *me = p;
ND("");
*ps = me->st;
@@ -563,44 +394,42 @@ pcap_stats(pcap_t *p, struct pcap_stat *ps)
char *
pcap_geterr(pcap_t *p)
{
- struct my_ring *me = p;
+ struct pcap_ring *me = p;
D("");
return me->msg;
}
pcap_t *
-pcap_open_live(const char *device, __unused int snaplen,
- int promisc, int to_ms, __unused char *errbuf)
+pcap_open_live(const char *device, int snaplen,
+ int promisc, int to_ms, char *errbuf)
{
- struct my_ring *me;
+ struct pcap_ring *me;
+ int l;
+ (void)snaplen; /* UNUSED */
+ (void)errbuf; /* UNUSED */
+ if (!device) {
+ D("missing device name");
+ return NULL;
+ }
+
+ l = strlen(device) + 1;
D("request to open %s snaplen %d promisc %d timeout %dms",
device, snaplen, promisc, to_ms);
- me = calloc(1, sizeof(*me));
+ me = calloc(1, sizeof(*me) + l);
if (me == NULL) {
D("failed to allocate struct for %s", device);
return NULL;
}
- strncpy(me->nmr.nr_name, device, sizeof(me->nmr.nr_name));
- if (netmap_open(me, 0)) {
+ me->me.ifname = (char *)(me + 1);
+ strcpy((char *)me->me.ifname, device);
+ if (netmap_open(&me->me, 0, promisc)) {
D("error opening %s", device);
free(me);
return NULL;
}
me->to_ms = to_ms;
- if (do_ioctl(me, SIOCGIFFLAGS))
- D("SIOCGIFFLAGS failed");
- if (promisc) {
- me->if_flags |= IFF_PPROMISC;
- if (do_ioctl(me, SIOCSIFFLAGS))
- D("SIOCSIFFLAGS failed");
- }
- if (do_ioctl(me, SIOCGIFCAP))
- D("SIOCGIFCAP failed");
- me->if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
- if (do_ioctl(me, SIOCSIFCAP))
- D("SIOCSIFCAP failed");
return (pcap_t *)me;
}
@@ -640,15 +469,19 @@ pcap_get_selectable_fd(pcap_t *p)
}
int
-pcap_setnonblock(__unused pcap_t *p, int nonblock, __unused char *errbuf)
+pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf)
{
+ (void)p; /* UNUSED */
+ (void)errbuf; /* UNUSED */
D("mode is %d", nonblock);
return 0; /* ignore */
}
int
-pcap_setdirection(__unused pcap_t *p, __unused pcap_direction_t d)
+pcap_setdirection(pcap_t *p, pcap_direction_t d)
{
+ (void)p; /* UNUSED */
+ (void)d; /* UNUSED */
D("");
return 0; /* ignore */
};
@@ -656,7 +489,8 @@ pcap_setdirection(__unused pcap_t *p, __unused pcap_direction_t d)
int
pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
{
- struct my_ring *me = p;
+ struct pcap_ring *pme = p;
+ struct my_ring *me = &pme->me;
int got = 0;
u_int si;
@@ -669,7 +503,7 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
ND("ring has %d pkts", ring->avail);
if (ring->avail == 0)
continue;
- me->hdr.ts = ring->ts;
+ pme->hdr.ts = ring->ts;
/*
* XXX a proper prefetch should be done as
* prefetch(i); callback(i-1); ...
@@ -684,15 +518,15 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
}
u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
prefetch(buf);
- me->hdr.len = me->hdr.caplen = ring->slot[i].len;
+ pme->hdr.len = pme->hdr.caplen = ring->slot[i].len;
// D("call %p len %d", p, me->hdr.len);
- callback(user, &me->hdr, buf);
+ callback(user, &pme->hdr, buf);
ring->cur = NETMAP_RING_NEXT(ring, i);
ring->avail--;
got++;
}
}
- me->st.ps_recv += got;
+ pme->st.ps_recv += got;
return got;
}
@@ -732,13 +566,13 @@ pcap_inject(pcap_t *p, const void *buf, size_t size)
int
pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
{
- struct my_ring *me = p;
+ struct pcap_ring *me = p;
struct pollfd fds[1];
int i;
ND("cnt %d", cnt);
memset(fds, 0, sizeof(fds));
- fds[0].fd = me->fd;
+ fds[0].fd = me->me.fd;
fds[0].events = (POLLIN);
while (cnt == -1 || cnt > 0) {
@@ -753,11 +587,10 @@ pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
return 0;
}
-#endif /* __PIC__ */
+#endif /* !TEST */
-#ifndef __PIC__
-static void
-do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf)
+#ifdef TEST /* build test code */
+void do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf)
{
pcap_inject((pcap_t *)user, buf, h->caplen);
}
@@ -819,4 +652,4 @@ main(int argc, char **argv)
return (0);
}
-#endif /* !__PIC__ */
+#endif /* TEST */
diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c
index d0e6409..7c2ad98 100644
--- a/tools/tools/netmap/pkt-gen.c
+++ b/tools/tools/netmap/pkt-gen.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: pkt-gen.c 10967 2012-05-03 11:29:23Z luigi $
+ * $Id: pkt-gen.c 12024 2013-01-25 05:41:51Z luigi $
*
* Example program to show how to build a multithreaded packet
* source/sink using the netmap device.
@@ -36,120 +36,17 @@
*
*/
-const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n"
+#include "nm_util.h"
+
+const char *default_payload="netmap pkt-gen payload\n"
"http://info.iet.unipi.it/~luigi/netmap/ ";
-#include <errno.h>
-#include <pthread.h> /* pthread_* */
-#include <pthread_np.h> /* pthread w/ affinity */
-#include <signal.h> /* signal */
-#include <stdlib.h>
-#include <stdio.h>
-#include <inttypes.h> /* PRI* macros */
-#include <string.h> /* strcmp */
-#include <fcntl.h> /* open */
-#include <unistd.h> /* close */
-#include <ifaddrs.h> /* getifaddrs */
-
-#include <sys/mman.h> /* PROT_* */
-#include <sys/ioctl.h> /* ioctl */
-#include <sys/poll.h>
-#include <sys/socket.h> /* sockaddr.. */
-#include <arpa/inet.h> /* ntohs */
-#include <sys/param.h>
-#include <sys/cpuset.h> /* cpu_set */
-#include <sys/sysctl.h> /* sysctl */
-#include <sys/time.h> /* timersub */
-
-#include <net/ethernet.h>
-#include <net/if.h> /* ifreq */
-#include <net/if_dl.h> /* LLADDR */
-
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/udp.h>
-
-#include <net/netmap.h>
-#include <net/netmap_user.h>
-#include <pcap/pcap.h>
-
-
-static inline int min(int a, int b) { return a < b ? a : b; }
-
-/* debug support */
-#define D(format, ...) \
- fprintf(stderr, "%s [%d] " format "\n", \
- __FUNCTION__, __LINE__, ##__VA_ARGS__)
-
-#ifndef EXPERIMENTAL
-#define EXPERIMENTAL 0
-#endif
+int time_second; // support for RD() debugging macro
int verbose = 0;
-#define MAX_QUEUES 64 /* no need to limit */
#define SKIP_PAYLOAD 1 /* do not check payload. */
-inline void prefetch (const void *x)
-{
- __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x));
-}
-
-// XXX only for multiples of 64 bytes, non overlapped.
-static inline void
-pkt_copy(void *_src, void *_dst, int l)
-{
- uint64_t *src = _src;
- uint64_t *dst = _dst;
-#define likely(x) __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
- if (unlikely(l >= 1024)) {
- bcopy(src, dst, l);
- return;
- }
- for (; l > 0; l-=64) {
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- }
-}
-
-
-#if EXPERIMENTAL
-/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */
-#define netmap_rdtsc(t) \
- do { \
- u_int __regs[4]; \
- \
- do_cpuid(0, __regs); \
- (t) = rdtsc(); \
- } while (0)
-
-static __inline void
-do_cpuid(u_int ax, u_int *p)
-{
- __asm __volatile("cpuid"
- : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
- : "0" (ax));
-}
-
-static __inline uint64_t
-rdtsc(void)
-{
- uint64_t rv;
-
- __asm __volatile("rdtsc" : "=A" (rv));
- return (rv);
-}
-#define MAX_SAMPLES 100000
-#endif /* EXPERIMENTAL */
-
-
struct pkt {
struct ether_header eh;
struct ip ip;
@@ -157,16 +54,29 @@ struct pkt {
uint8_t body[2048]; // XXX hardwired
} __attribute__((__packed__));
+struct ip_range {
+ char *name;
+ struct in_addr start, end, cur;
+ uint16_t port0, port1, cur_p;
+};
+
+struct mac_range {
+ char *name;
+ struct ether_addr start, end;
+};
+
/*
* global arguments for all threads
*/
+
struct glob_arg {
- const char *src_ip;
- const char *dst_ip;
- const char *src_mac;
- const char *dst_mac;
+ struct ip_range src_ip;
+ struct ip_range dst_ip;
+ struct mac_range dst_mac;
+ struct mac_range src_mac;
int pkt_size;
int burst;
+ int forever;
int npackets; /* total packets to send */
int nthreads;
int cpus;
@@ -175,13 +85,20 @@ struct glob_arg {
#define OPT_ACCESS 2
#define OPT_COPY 4
#define OPT_MEMCPY 8
- int use_pcap;
+#define OPT_TS 16 /* add a timestamp */
+ int dev_type;
pcap_t *p;
-};
-struct mystat {
- uint64_t containers[8];
+ int affinity;
+ int main_fd;
+ int report_interval;
+ void *(*td_body)(void *);
+ void *mmap_addr;
+ int mmap_size;
+ char *ifname;
};
+enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
+
/*
* Arguments for a new thread. The same structure is used by
@@ -196,43 +113,106 @@ struct targ {
struct nmreq nmr;
struct netmap_if *nifp;
uint16_t qfirst, qlast; /* range of queues to scan */
- uint64_t count;
+ volatile uint64_t count;
struct timeval tic, toc;
int me;
pthread_t thread;
int affinity;
- uint8_t dst_mac[6];
- uint8_t src_mac[6];
- u_int dst_mac_range;
- u_int src_mac_range;
- uint32_t dst_ip;
- uint32_t src_ip;
- u_int dst_ip_range;
- u_int src_ip_range;
-
struct pkt pkt;
};
+/*
+ * extract the extremes from a range of ipv4 addresses.
+ * addr_lo[-addr_hi][:port_lo[-port_hi]]
+ */
+static void
+extract_ip_range(struct ip_range *r)
+{
+ char *p_lo, *p_hi;
+ char buf1[16]; // one ip address
+
+ D("extract IP range from %s", r->name);
+ p_lo = index(r->name, ':'); /* do we have ports ? */
+ if (p_lo) {
+ D(" found ports at %s", p_lo);
+ *p_lo++ = '\0';
+ p_hi = index(p_lo, '-');
+ if (p_hi)
+ *p_hi++ = '\0';
+ else
+ p_hi = p_lo;
+ r->port0 = strtol(p_lo, NULL, 0);
+ r->port1 = strtol(p_hi, NULL, 0);
+ if (r->port1 < r->port0) {
+ r->cur_p = r->port0;
+ r->port0 = r->port1;
+ r->port1 = r->cur_p;
+ }
+ r->cur_p = r->port0;
+ D("ports are %d to %d", r->port0, r->port1);
+ }
+ p_hi = index(r->name, '-'); /* do we have upper ip ? */
+ if (p_hi) {
+ *p_hi++ = '\0';
+ } else
+ p_hi = r->name;
+ inet_aton(r->name, &r->start);
+ inet_aton(p_hi, &r->end);
+ if (r->start.s_addr > r->end.s_addr) {
+ r->cur = r->start;
+ r->start = r->end;
+ r->end = r->cur;
+ }
+ r->cur = r->start;
+ strncpy(buf1, inet_ntoa(r->end), sizeof(buf1));
+ D("range is %s %d to %s %d", inet_ntoa(r->start), r->port0,
+ buf1, r->port1);
+}
+
+static void
+extract_mac_range(struct mac_range *r)
+{
+ D("extract MAC range from %s", r->name);
+ bcopy(ether_aton(r->name), &r->start, 6);
+ bcopy(ether_aton(r->name), &r->end, 6);
+#if 0
+ bcopy(targ->src_mac, eh->ether_shost, 6);
+ p = index(targ->g->src_mac, '-');
+ if (p)
+ targ->src_mac_range = atoi(p+1);
+
+ bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
+ bcopy(targ->dst_mac, eh->ether_dhost, 6);
+ p = index(targ->g->dst_mac, '-');
+ if (p)
+ targ->dst_mac_range = atoi(p+1);
+#endif
+ D("%s starts at %s", r->name, ether_ntoa(&r->start));
+}
+
static struct targ *targs;
static int global_nthreads;
/* control-C handler */
static void
-sigint_h(__unused int sig)
+sigint_h(int sig)
{
- for (int i = 0; i < global_nthreads; i++)
- targs[i].cancel = 1;
+ int i;
+ (void)sig; /* UNUSED */
+ for (i = 0; i < global_nthreads; i++) {
+ targs[i].cancel = 1;
+ }
signal(SIGINT, SIG_DFL);
}
-
/* sysctl wrapper to return the number of active CPUs */
static int
system_ncpus(void)
{
+#ifdef __FreeBSD__
int mib[2], ncpus;
size_t len;
@@ -242,8 +222,32 @@ system_ncpus(void)
sysctl(mib, 2, &ncpus, &len, NULL, 0);
return (ncpus);
+#else
+ return 1;
+#endif /* !__FreeBSD__ */
}
+#ifdef __linux__
+#define sockaddr_dl sockaddr_ll
+#define sdl_family sll_family
+#define AF_LINK AF_PACKET
+#define LLADDR(s) s->sll_addr;
+#include <linux/if_tun.h>
+#define TAP_CLONEDEV "/dev/net/tun"
+#endif /* __linux__ */
+
+#ifdef __FreeBSD__
+#include <net/if_tun.h>
+#define TAP_CLONEDEV "/dev/tap"
+#endif /* __FreeBSD */
+
+#ifdef __APPLE__
+// #warning TAP not supported on apple ?
+#include <net/if_utun.h>
+#define TAP_CLONEDEV "/dev/tap"
+#endif /* __APPLE__ */
+
+
/*
* locate the src mac address for our interface, put it
* into the user-supplied buffer. return 0 if ok, -1 on error.
@@ -285,6 +289,7 @@ source_hwaddr(const char *ifname, char *buf)
static int
setaffinity(pthread_t me, int i)
{
+#ifdef __FreeBSD__
cpuset_t cpumask;
if (i == -1)
@@ -298,36 +303,57 @@ setaffinity(pthread_t me, int i)
D("Unable to set affinity");
return 1;
}
+#else
+ (void)me; /* suppress 'unused' warnings */
+ (void)i;
+#endif /* __FreeBSD__ */
return 0;
}
/* Compute the checksum of the given ip header. */
static uint16_t
-checksum(const void *data, uint16_t len)
+checksum(const void *data, uint16_t len, uint32_t sum)
{
const uint8_t *addr = data;
- uint32_t sum = 0;
+ uint32_t i;
- while (len > 1) {
- sum += addr[0] * 256 + addr[1];
- addr += 2;
- len -= 2;
+ /* Checksum all the pairs of bytes first... */
+ for (i = 0; i < (len & ~1U); i += 2) {
+ sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
+ if (sum > 0xFFFF)
+ sum -= 0xFFFF;
}
+ /*
+ * If there's a single byte left over, checksum it, too.
+ * Network byte order is big-endian, so the remaining byte is
+ * the high byte.
+ */
+ if (i < len) {
+ sum += addr[i] << 8;
+ if (sum > 0xFFFF)
+ sum -= 0xFFFF;
+ }
+ return sum;
+}
- if (len == 1)
- sum += *addr * 256;
-
- sum = (sum >> 16) + (sum & 0xffff);
- sum += (sum >> 16);
-
- sum = htons(sum);
-
- return ~sum;
+static u_int16_t
+wrapsum(u_int32_t sum)
+{
+ sum = ~sum & 0xFFFF;
+ return (htons(sum));
}
/*
* Fill a packet with some payload.
+ * We create a UDP packet so the payload starts at
+ * 14+20+8 = 42 bytes.
*/
+#ifdef __linux__
+#define uh_sport source
+#define uh_dport dest
+#define uh_ulen len
+#define uh_sum check
+#endif /* linux */
static void
initialize_packet(struct targ *targ)
{
@@ -335,9 +361,8 @@ initialize_packet(struct targ *targ)
struct ether_header *eh;
struct ip *ip;
struct udphdr *udp;
- uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(*ip);
+ uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip);
int i, l, l0 = strlen(default_payload);
- char *p;
for (i = 0; i < paylen;) {
l = min(l0, paylen - i);
@@ -345,14 +370,8 @@ initialize_packet(struct targ *targ)
i += l;
}
pkt->body[i-1] = '\0';
-
- udp = &pkt->udp;
- udp->uh_sport = htons(1234);
- udp->uh_dport = htons(4321);
- udp->uh_ulen = htons(paylen);
- udp->uh_sum = 0; // checksum(udp, sizeof(*udp));
-
ip = &pkt->ip;
+
ip->ip_v = IPVERSION;
ip->ip_hl = 5;
ip->ip_id = 0;
@@ -362,29 +381,36 @@ initialize_packet(struct targ *targ)
ip->ip_off = htons(IP_DF); /* Don't fragment */
ip->ip_ttl = IPDEFTTL;
ip->ip_p = IPPROTO_UDP;
- inet_aton(targ->g->src_ip, (struct in_addr *)&ip->ip_src);
- inet_aton(targ->g->dst_ip, (struct in_addr *)&ip->ip_dst);
- targ->dst_ip = ip->ip_dst.s_addr;
- targ->src_ip = ip->ip_src.s_addr;
- p = index(targ->g->src_ip, '-');
- if (p) {
- targ->dst_ip_range = atoi(p+1);
- D("dst-ip sweep %d addresses", targ->dst_ip_range);
- }
- ip->ip_sum = checksum(ip, sizeof(*ip));
+ ip->ip_dst.s_addr = targ->g->dst_ip.cur.s_addr;
+ if (++targ->g->dst_ip.cur.s_addr > targ->g->dst_ip.end.s_addr)
+ targ->g->dst_ip.cur.s_addr = targ->g->dst_ip.start.s_addr;
+ ip->ip_src.s_addr = targ->g->src_ip.cur.s_addr;
+ if (++targ->g->src_ip.cur.s_addr > targ->g->src_ip.end.s_addr)
+ targ->g->src_ip.cur.s_addr = targ->g->src_ip.start.s_addr;
+ ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0));
- eh = &pkt->eh;
- bcopy(ether_aton(targ->g->src_mac), targ->src_mac, 6);
- bcopy(targ->src_mac, eh->ether_shost, 6);
- p = index(targ->g->src_mac, '-');
- if (p)
- targ->src_mac_range = atoi(p+1);
- bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
- bcopy(targ->dst_mac, eh->ether_dhost, 6);
- p = index(targ->g->dst_mac, '-');
- if (p)
- targ->dst_mac_range = atoi(p+1);
+ udp = &pkt->udp;
+ udp->uh_sport = htons(targ->g->src_ip.cur_p);
+ if (++targ->g->src_ip.cur_p > targ->g->src_ip.port1)
+ targ->g->src_ip.cur_p = targ->g->src_ip.port0;
+ udp->uh_dport = htons(targ->g->dst_ip.cur_p);
+ if (++targ->g->dst_ip.cur_p > targ->g->dst_ip.port1)
+ targ->g->dst_ip.cur_p = targ->g->dst_ip.port0;
+ udp->uh_ulen = htons(paylen);
+ /* Magic: taken from sbin/dhclient/packet.c */
+ udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp),
+ checksum(pkt->body,
+ paylen - sizeof(*udp),
+ checksum(&ip->ip_src, 2 * sizeof(ip->ip_src),
+ IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen)
+ )
+ )
+ ));
+
+ eh = &pkt->eh;
+ bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
+ bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
eh->ether_type = htons(ETHERTYPE_IP);
}
@@ -452,7 +478,6 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt,
memcpy(p, pkt, size);
else if (options & OPT_PREFETCH)
prefetch(p);
-
slot->len = size;
if (sent == count - 1)
slot->flags |= NS_REPORT;
@@ -464,23 +489,198 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt,
return (sent);
}
+/*
+ * Send a packet, and wait for a response.
+ * The payload (after UDP header, ofs 42) has a 4-byte sequence
+ * followed by a struct timeval (or bintime?)
+ */
+#define PAY_OFS 42 /* where in the pkt... */
+
+static void *
+pinger_body(void *data)
+{
+ struct targ *targ = (struct targ *) data;
+ struct pollfd fds[1];
+ struct netmap_if *nifp = targ->nifp;
+ int i, rx = 0, n = targ->g->npackets;
+
+ fds[0].fd = targ->fd;
+ fds[0].events = (POLLIN);
+ static uint32_t sent;
+ struct timespec ts, now, last_print;
+ uint32_t count = 0, min = 1000000000, av = 0;
+
+ if (targ->g->nthreads > 1) {
+ D("can only ping with 1 thread");
+ return NULL;
+ }
+
+ clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
+ while (n == 0 || (int)sent < n) {
+ struct netmap_ring *ring = NETMAP_TXRING(nifp, 0);
+ struct netmap_slot *slot;
+ char *p;
+ for (i = 0; i < 1; i++) {
+ slot = &ring->slot[ring->cur];
+ slot->len = targ->g->pkt_size;
+ p = NETMAP_BUF(ring, slot->buf_idx);
+
+ if (ring->avail == 0) {
+ D("-- ouch, cannot send");
+ } else {
+ pkt_copy(&targ->pkt, p, targ->g->pkt_size);
+ clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
+ bcopy(&sent, p+42, sizeof(sent));
+ bcopy(&ts, p+46, sizeof(ts));
+ sent++;
+ ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
+ ring->avail--;
+ }
+ }
+ /* should use a parameter to decide how often to send */
+ if (poll(fds, 1, 3000) <= 0) {
+ D("poll error/timeout on queue %d", targ->me);
+ continue;
+ }
+ /* see what we got back */
+ for (i = targ->qfirst; i < targ->qlast; i++) {
+ ring = NETMAP_RXRING(nifp, i);
+ while (ring->avail > 0) {
+ uint32_t seq;
+ slot = &ring->slot[ring->cur];
+ p = NETMAP_BUF(ring, slot->buf_idx);
+
+ clock_gettime(CLOCK_REALTIME_PRECISE, &now);
+ bcopy(p+42, &seq, sizeof(seq));
+ bcopy(p+46, &ts, sizeof(ts));
+ ts.tv_sec = now.tv_sec - ts.tv_sec;
+ ts.tv_nsec = now.tv_nsec - ts.tv_nsec;
+ if (ts.tv_nsec < 0) {
+ ts.tv_nsec += 1000000000;
+ ts.tv_sec--;
+ }
+ if (1) D("seq %d/%d delta %d.%09d", seq, sent,
+ (int)ts.tv_sec, (int)ts.tv_nsec);
+ if (ts.tv_nsec < (int)min)
+ min = ts.tv_nsec;
+ count ++;
+ av += ts.tv_nsec;
+ ring->avail--;
+ ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
+ rx++;
+ }
+ }
+ //D("tx %d rx %d", sent, rx);
+ //usleep(100000);
+ ts.tv_sec = now.tv_sec - last_print.tv_sec;
+ ts.tv_nsec = now.tv_nsec - last_print.tv_nsec;
+ if (ts.tv_nsec < 0) {
+ ts.tv_nsec += 1000000000;
+ ts.tv_sec--;
+ }
+ if (ts.tv_sec >= 1) {
+ D("count %d min %d av %d",
+ count, min, av/count);
+ count = 0;
+ av = 0;
+ min = 100000000;
+ last_print = now;
+ }
+ }
+ return NULL;
+}
+
+
+/*
+ * reply to ping requests
+ */
+static void *
+ponger_body(void *data)
+{
+ struct targ *targ = (struct targ *) data;
+ struct pollfd fds[1];
+ struct netmap_if *nifp = targ->nifp;
+ struct netmap_ring *txring, *rxring;
+ int i, rx = 0, sent = 0, n = targ->g->npackets;
+ fds[0].fd = targ->fd;
+ fds[0].events = (POLLIN);
+
+ if (targ->g->nthreads > 1) {
+ D("can only reply ping with 1 thread");
+ return NULL;
+ }
+ D("understood ponger %d but don't know how to do it", n);
+ while (n == 0 || sent < n) {
+ uint32_t txcur, txavail;
+//#define BUSYWAIT
+#ifdef BUSYWAIT
+ ioctl(fds[0].fd, NIOCRXSYNC, NULL);
+#else
+ if (poll(fds, 1, 1000) <= 0) {
+ D("poll error/timeout on queue %d", targ->me);
+ continue;
+ }
+#endif
+ txring = NETMAP_TXRING(nifp, 0);
+ txcur = txring->cur;
+ txavail = txring->avail;
+ /* see what we got back */
+ for (i = targ->qfirst; i < targ->qlast; i++) {
+ rxring = NETMAP_RXRING(nifp, i);
+ while (rxring->avail > 0) {
+ uint16_t *spkt, *dpkt;
+ uint32_t cur = rxring->cur;
+ struct netmap_slot *slot = &rxring->slot[cur];
+ char *src, *dst;
+ src = NETMAP_BUF(rxring, slot->buf_idx);
+ //D("got pkt %p of size %d", src, slot->len);
+ rxring->avail--;
+ rxring->cur = NETMAP_RING_NEXT(rxring, cur);
+ rx++;
+ if (txavail == 0)
+ continue;
+ dst = NETMAP_BUF(txring,
+ txring->slot[txcur].buf_idx);
+ /* copy... */
+ dpkt = (uint16_t *)dst;
+ spkt = (uint16_t *)src;
+ pkt_copy(src, dst, slot->len);
+ dpkt[0] = spkt[3];
+ dpkt[1] = spkt[4];
+ dpkt[2] = spkt[5];
+ dpkt[3] = spkt[0];
+ dpkt[4] = spkt[1];
+ dpkt[5] = spkt[2];
+ txring->slot[txcur].len = slot->len;
+ /* XXX swap src dst mac */
+ txcur = NETMAP_RING_NEXT(txring, txcur);
+ txavail--;
+ sent++;
+ }
+ }
+ txring->cur = txcur;
+ txring->avail = txavail;
+ targ->count = sent;
+#ifdef BUSYWAIT
+ ioctl(fds[0].fd, NIOCTXSYNC, NULL);
+#endif
+ //D("tx %d rx %d", sent, rx);
+ }
+ return NULL;
+}
+
+
static void *
sender_body(void *data)
{
struct targ *targ = (struct targ *) data;
+
struct pollfd fds[1];
struct netmap_if *nifp = targ->nifp;
struct netmap_ring *txring;
- int i, pkts_per_td = targ->g->npackets / targ->g->nthreads, sent = 0;
- int continuous = 0;
+ int i, n = targ->g->npackets / targ->g->nthreads, sent = 0;
int options = targ->g->options | OPT_COPY;
- int retval;
-
D("start");
- if (pkts_per_td == 0) {
- continuous = 1;
- pkts_per_td = 100000;
- }
if (setaffinity(targ->thread, targ->affinity))
goto quit;
/* setup poll(2) mechanism. */
@@ -490,45 +690,56 @@ D("start");
/* main loop.*/
gettimeofday(&targ->tic, NULL);
- if (targ->g->use_pcap) {
- int size = targ->g->pkt_size;
- void *pkt = &targ->pkt;
- pcap_t *p = targ->g->p;
- for (i = 0; (sent < pkts_per_td && !targ->cancel) || continuous; i++) {
+ if (targ->g->dev_type == DEV_PCAP) {
+ int size = targ->g->pkt_size;
+ void *pkt = &targ->pkt;
+ pcap_t *p = targ->g->p;
+
+ for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
if (pcap_inject(p, pkt, size) != -1)
sent++;
if (i > 10000) {
targ->count = sent;
i = 0;
}
- }
+ }
+ } else if (targ->g->dev_type == DEV_TAP) { /* tap */
+ int size = targ->g->pkt_size;
+ void *pkt = &targ->pkt;
+ D("writing to file desc %d", targ->g->main_fd);
+
+ for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
+ if (write(targ->g->main_fd, pkt, size) != -1)
+ sent++;
+ if (i > 10000) {
+ targ->count = sent;
+ i = 0;
+ }
+ }
} else {
- while (sent < pkts_per_td || continuous) {
+ while (!targ->cancel && (n == 0 || sent < n)) {
/*
* wait for available room in the send queue(s)
*/
- if ((retval = poll(fds, 1, 2000)) <= 0) {
+ if (poll(fds, 1, 2000) <= 0) {
if (targ->cancel)
break;
- if (retval == 0)
- D("poll timeout on queue %d\n", targ->me);
- else
- D("poll error on queue %d: %s\n", targ->me,
- strerror(errno));
+ D("poll error/timeout on queue %d", targ->me);
goto quit;
}
/*
* scan our queues and send on those with room
*/
- if (sent > 100000 && !(targ->g->options & OPT_COPY) )
+ if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) {
+ D("drop copy");
options &= ~OPT_COPY;
- for (i = targ->qfirst; i < targ->qlast && !targ->cancel; i++) {
+ }
+ for (i = targ->qfirst; i < targ->qlast; i++) {
int m, limit = targ->g->burst;
- if (!continuous && pkts_per_td - sent < limit)
- limit = pkts_per_td - sent;
-
+ if (n > 0 && n - sent < limit)
+ limit = n - sent;
txring = NETMAP_TXRING(nifp, i);
if (txring->avail == 0)
continue;
@@ -537,8 +748,6 @@ D("start");
sent += m;
targ->count = sent;
}
- if (targ->cancel)
- break;
}
/* flush any remaining packets */
ioctl(fds[0].fd, NIOCTXSYNC, NULL);
@@ -566,10 +775,12 @@ quit:
static void
-receive_pcap(u_char *user, __unused const struct pcap_pkthdr * h,
- __unused const u_char * bytes)
+receive_pcap(u_char *user, const struct pcap_pkthdr * h,
+ const u_char * bytes)
{
int *count = (int *)user;
+ (void)h; /* UNUSED */
+ (void)bytes; /* UNUSED */
(*count)++;
}
@@ -603,7 +814,8 @@ receiver_body(void *data)
struct pollfd fds[1];
struct netmap_if *nifp = targ->nifp;
struct netmap_ring *rxring;
- int i, received = 0;
+ int i;
+ uint64_t received = 0;
if (setaffinity(targ->thread, targ->affinity))
goto quit;
@@ -614,7 +826,7 @@ receiver_body(void *data)
fds[0].events = (POLLIN);
/* unbounded wait for the first packet. */
- while (!targ->cancel) {
+ for (;;) {
i = poll(fds, 1, 1000);
if (i > 0 && !(fds[0].revents & POLLERR))
break;
@@ -623,15 +835,24 @@ receiver_body(void *data)
/* main loop, exit after 1s silence */
gettimeofday(&targ->tic, NULL);
- if (targ->g->use_pcap) {
+ if (targ->g->dev_type == DEV_PCAP) {
while (!targ->cancel) {
+ /* XXX should we poll ? */
pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
}
+ } else if (targ->g->dev_type == DEV_TAP) {
+ D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd);
+ while (!targ->cancel) {
+ char buf[2048];
+ /* XXX should we poll ? */
+ if (read(targ->g->main_fd, buf, sizeof(buf)) > 0)
+ targ->count++;
+ }
} else {
while (!targ->cancel) {
/* Once we started to receive packets, wait at most 1 seconds
before quitting. */
- if (poll(fds, 1, 1 * 1000) <= 0) {
+ if (poll(fds, 1, 1 * 1000) <= 0 && targ->g->forever == 0) {
gettimeofday(&targ->toc, NULL);
targ->toc.tv_sec -= 1; /* Subtract timeout time. */
break;
@@ -647,8 +868,8 @@ receiver_body(void *data)
m = receive_packets(rxring, targ->g->burst,
SKIP_PAYLOAD);
received += m;
- targ->count = received;
}
+ targ->count = received;
// tell the card we have read the data
//ioctl(fds[0].fd, NIOCRXSYNC, NULL);
@@ -665,59 +886,55 @@ quit:
return (NULL);
}
-static char *
-scaled_val(double val)
+/* very crude code to print a number in normalized form.
+ * Caller has to make sure that the buffer is large enough.
+ */
+static const char *
+norm(char *buf, double val)
{
- static char buf[64];
- const char *units[] = {"", "K", "M", "G"};
- int i = 0;
+ char *units[] = { "", "K", "M", "G" };
+ u_int i;
- while (val >= 1000 && i < 3) {
+ for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++)
val /= 1000;
- i++;
- }
- snprintf(buf, sizeof(buf), "%.2f%s", val, units[i]);
- return (buf);
+ sprintf(buf, "%.2f %s", val, units[i]);
+ return buf;
}
static void
tx_output(uint64_t sent, int size, double delta)
{
- uint64_t bytes_sent = sent * size;
- double bw = 8.0 * bytes_sent / delta;
- double pps = sent / delta;
- /*
- * Assume Ethernet overhead of 24 bytes per packet excluding header:
- * FCS 4 bytes
- * Preamble 8 bytes
- * IFG 12 bytes
- */
- double bw_with_overhead = 8.0 * (bytes_sent + sent * 24) / delta;
+ double bw, raw_bw, pps;
+ char b1[40], b2[80], b3[80];
printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n",
sent, size, delta);
- printf("Speed: %spps. ", scaled_val(pps));
- printf("Bandwidth: %sbps ", scaled_val(bw));
- printf("(%sbps with overhead).\n", scaled_val(bw_with_overhead));
-
+ if (delta == 0)
+ delta = 1e-6;
+ if (size < 60) /* correct for min packet size */
+ size = 60;
+ pps = sent / delta;
+ bw = (8.0 * size * sent) / delta;
+ /* raw packets have4 bytes crc + 20 bytes framing */
+ raw_bw = (8.0 * (size + 24) * sent) / delta;
+
+ printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n",
+ norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) );
}
static void
rx_output(uint64_t received, double delta)
{
-
- double pps = received / delta;
- char units[4] = { '\0', 'K', 'M', 'G' };
- int punit = 0;
-
- while (pps >= 1000) {
- pps /= 1000;
- punit += 1;
- }
+ double pps;
+ char b1[40];
printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta);
- printf("Speed: %.2f%cpps.\n", pps, units[punit]);
+
+ if (delta == 0)
+ delta = 1e-6;
+ pps = received / delta;
+ printf("Speed: %spps\n", norm(b1, pps));
}
static void
@@ -728,17 +945,21 @@ usage(void)
"Usage:\n"
"%s arguments\n"
"\t-i interface interface name\n"
- "\t-t pkts_to_send also forces send mode, 0 = continuous\n"
- "\t-r pkts_to_receive also forces receive mode\n"
+ "\t-f function tx rx ping pong\n"
+ "\t-n count number of iterations (can be 0)\n"
+ "\t-t pkts_to_send also forces tx mode\n"
+ "\t-r pkts_to_receive also forces rx mode\n"
"\t-l pkts_size in bytes excluding CRC\n"
"\t-d dst-ip end with %%n to sweep n addresses\n"
"\t-s src-ip end with %%n to sweep n addresses\n"
"\t-D dst-mac end with %%n to sweep n addresses\n"
"\t-S src-mac end with %%n to sweep n addresses\n"
+ "\t-a cpu_id use setaffinity\n"
"\t-b burst size testing, mostly\n"
"\t-c cores cores to use\n"
"\t-p threads processes/threads to use\n"
"\t-T report_ms milliseconds between reports\n"
+ "\t-P use libpcap instead of netmap\n"
"\t-w wait_for_link_time in seconds\n"
"",
cmd);
@@ -746,71 +967,341 @@ usage(void)
exit(0);
}
+static void
+start_threads(struct glob_arg *g)
+{
+ int i;
+
+ targs = calloc(g->nthreads, sizeof(*targs));
+ /*
+ * Now create the desired number of threads, each one
+ * using a single descriptor.
+ */
+ for (i = 0; i < g->nthreads; i++) {
+ bzero(&targs[i], sizeof(targs[i]));
+ targs[i].fd = -1; /* default, with pcap */
+ targs[i].g = g;
+
+ if (g->dev_type == DEV_NETMAP) {
+ struct nmreq tifreq;
+ int tfd;
+
+ /* register interface. */
+ tfd = open("/dev/netmap", O_RDWR);
+ if (tfd == -1) {
+ D("Unable to open /dev/netmap");
+ continue;
+ }
+ targs[i].fd = tfd;
+
+ bzero(&tifreq, sizeof(tifreq));
+ strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name));
+ tifreq.nr_version = NETMAP_API;
+ tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
+
+ /*
+ * if we are acting as a receiver only, do not touch the transmit ring.
+ * This is not the default because many apps may use the interface
+ * in both directions, but a pure receiver does not.
+ */
+ if (g->td_body == receiver_body) {
+ tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
+ }
+
+ if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
+ D("Unable to register %s", g->ifname);
+ continue;
+ }
+ targs[i].nmr = tifreq;
+ targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset);
+ /* start threads. */
+ targs[i].qfirst = (g->nthreads > 1) ? i : 0;
+ targs[i].qlast = (g->nthreads > 1) ? i+1 :
+ (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
+ } else {
+ targs[i].fd = g->main_fd;
+ }
+ targs[i].used = 1;
+ targs[i].me = i;
+ if (g->affinity >= 0) {
+ if (g->affinity < g->cpus)
+ targs[i].affinity = g->affinity;
+ else
+ targs[i].affinity = i % g->cpus;
+ } else
+ targs[i].affinity = -1;
+ /* default, init packets */
+ initialize_packet(&targs[i]);
+
+ if (pthread_create(&targs[i].thread, NULL, g->td_body,
+ &targs[i]) == -1) {
+ D("Unable to create thread %d", i);
+ targs[i].used = 0;
+ }
+ }
+}
+
+static void
+main_thread(struct glob_arg *g)
+{
+ int i;
+
+ uint64_t prev = 0;
+ uint64_t count = 0;
+ double delta_t;
+ struct timeval tic, toc;
+
+ gettimeofday(&toc, NULL);
+ for (;;) {
+ struct timeval now, delta;
+ uint64_t pps, usec, my_count, npkts;
+ int done = 0;
+
+ delta.tv_sec = g->report_interval/1000;
+ delta.tv_usec = (g->report_interval%1000)*1000;
+ select(0, NULL, NULL, NULL, &delta);
+ gettimeofday(&now, NULL);
+ time_second = now.tv_sec;
+ timersub(&now, &toc, &toc);
+ my_count = 0;
+ for (i = 0; i < g->nthreads; i++) {
+ my_count += targs[i].count;
+ if (targs[i].used == 0)
+ done++;
+ }
+ usec = toc.tv_sec* 1000000 + toc.tv_usec;
+ if (usec < 10000)
+ continue;
+ npkts = my_count - prev;
+ pps = (npkts*1000000 + usec/2) / usec;
+ D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)",
+ pps, npkts, usec);
+ prev = my_count;
+ toc = now;
+ if (done == g->nthreads)
+ break;
+ }
+
+ timerclear(&tic);
+ timerclear(&toc);
+ for (i = 0; i < g->nthreads; i++) {
+ /*
+ * Join active threads, unregister interfaces and close
+ * file descriptors.
+ */
+ pthread_join(targs[i].thread, NULL);
+ close(targs[i].fd);
+
+ if (targs[i].completed == 0)
+ D("ouch, thread %d exited with error", i);
+
+ /*
+ * Collect threads output and extract information about
+ * how long it took to send all the packets.
+ */
+ count += targs[i].count;
+ if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <))
+ tic = targs[i].tic;
+ if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >))
+ toc = targs[i].toc;
+ }
+
+ /* print output. */
+ timersub(&toc, &tic, &toc);
+ delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
+ if (g->td_body == sender_body)
+ tx_output(count, g->pkt_size, delta_t);
+ else
+ rx_output(count, delta_t);
+
+ if (g->dev_type == DEV_NETMAP) {
+ ioctl(g->main_fd, NIOCUNREGIF, NULL); // XXX deprecated
+ munmap(g->mmap_addr, g->mmap_size);
+ close(g->main_fd);
+ }
+}
+
+
+struct sf {
+ char *key;
+ void *f;
+};
+
+static struct sf func[] = {
+ { "tx", sender_body },
+ { "rx", receiver_body },
+ { "ping", pinger_body },
+ { "pong", ponger_body },
+ { NULL, NULL }
+};
+
+static int
+tap_alloc(char *dev)
+{
+ struct ifreq ifr;
+ int fd, err;
+ char *clonedev = TAP_CLONEDEV;
+
+ (void)err;
+ (void)dev;
+ /* Arguments taken by the function:
+ *
+ * char *dev: the name of an interface (or '\0'). MUST have enough
+ * space to hold the interface name if '\0' is passed
+ * int flags: interface flags (eg, IFF_TUN etc.)
+ */
+
+#ifdef __FreeBSD__
+ if (dev[3]) { /* tapSomething */
+ static char buf[128];
+ snprintf(buf, sizeof(buf), "/dev/%s", dev);
+ clonedev = buf;
+ }
+#endif
+ /* open the device */
+ if( (fd = open(clonedev, O_RDWR)) < 0 ) {
+ return fd;
+ }
+ D("%s open successful", clonedev);
+
+ /* preparation of the struct ifr, of type "struct ifreq" */
+ memset(&ifr, 0, sizeof(ifr));
+
+#ifdef linux
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+ if (*dev) {
+ /* if a device name was specified, put it in the structure; otherwise,
+ * the kernel will try to allocate the "next" device of the
+ * specified type */
+ strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+ }
+
+ /* try to create the device */
+ if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
+ D("failed to to a TUNSETIFF");
+ close(fd);
+ return err;
+ }
+
+ /* if the operation was successful, write back the name of the
+ * interface to the variable "dev", so the caller can know
+ * it. Note that the caller MUST reserve space in *dev (see calling
+ * code below) */
+ strcpy(dev, ifr.ifr_name);
+ D("new name is %s", dev);
+#endif /* linux */
+
+ /* this is the special file descriptor that the caller will use to talk
+ * with the virtual interface */
+ return fd;
+}
int
main(int arc, char **argv)
{
- int i, fd;
- char pcap_errbuf[PCAP_ERRBUF_SIZE];
+ int i;
struct glob_arg g;
struct nmreq nmr;
- void *mmap_addr; /* the mmap address */
- void *(*td_body)(void *) = receiver_body;
int ch;
- int report_interval = 1000; /* report interval */
- char *ifname = NULL;
int wait_link = 2;
int devqueues = 1; /* how many device queues */
bzero(&g, sizeof(g));
- g.src_ip = "10.0.0.1";
- g.dst_ip = "10.1.0.1";
- g.dst_mac = "ff:ff:ff:ff:ff:ff";
- g.src_mac = NULL;
+ g.main_fd = -1;
+ g.td_body = receiver_body;
+ g.report_interval = 1000; /* report interval */
+ g.affinity = -1;
+ /* ip addresses can also be a range x.x.x.x-x.x.x.y */
+ g.src_ip.name = "10.0.0.1";
+ g.dst_ip.name = "10.1.0.1";
+ g.dst_mac.name = "ff:ff:ff:ff:ff:ff";
+ g.src_mac.name = NULL;
g.pkt_size = 60;
g.burst = 512; // default
g.nthreads = 1;
g.cpus = 1;
while ( (ch = getopt(arc, argv,
- "i:t:r:l:d:s:D:S:b:c:o:p:PT:w:v")) != -1) {
+ "a:f:n:i:t:r:l:d:s:D:S:b:c:o:p:PT:w:Wv")) != -1) {
+ struct sf *fn;
+
switch(ch) {
default:
D("bad option %c %s", ch, optarg);
usage();
break;
- case 'o':
+
+ case 'n':
+ g.npackets = atoi(optarg);
+ break;
+
+ case 'f':
+ for (fn = func; fn->key; fn++) {
+ if (!strcmp(fn->key, optarg))
+ break;
+ }
+ if (fn->key)
+ g.td_body = fn->f;
+ else
+ D("unrecognised function %s", optarg);
+ break;
+
+ case 'o': /* data generation options */
g.options = atoi(optarg);
break;
+
+ case 'a': /* force affinity */
+ g.affinity = atoi(optarg);
+ break;
+
case 'i': /* interface */
- ifname = optarg;
+ g.ifname = optarg;
+ if (!strncmp(optarg, "tap", 3))
+ g.dev_type = DEV_TAP;
+ else
+ g.dev_type = DEV_NETMAP;
break;
- case 't': /* send */
- td_body = sender_body;
+
+ case 't': /* send, deprecated */
+ D("-t deprecated, please use -f tx -n %s", optarg);
+ g.td_body = sender_body;
g.npackets = atoi(optarg);
break;
+
case 'r': /* receive */
- td_body = receiver_body;
+ D("-r deprecated, please use -f rx -n %s", optarg);
+ g.td_body = receiver_body;
g.npackets = atoi(optarg);
break;
+
case 'l': /* pkt_size */
g.pkt_size = atoi(optarg);
break;
+
case 'd':
- g.dst_ip = optarg;
+ g.dst_ip.name = optarg;
break;
+
case 's':
- g.src_ip = optarg;
+ g.src_ip.name = optarg;
break;
+
case 'T': /* report interval */
- report_interval = atoi(optarg);
+ g.report_interval = atoi(optarg);
break;
+
case 'w':
wait_link = atoi(optarg);
break;
+
+ case 'W':
+ g.forever = 1; /* do not exit rx even with no traffic */
+ break;
+
case 'b': /* burst */
g.burst = atoi(optarg);
break;
@@ -822,61 +1313,71 @@ main(int arc, char **argv)
break;
case 'P':
- g.use_pcap = 1;
+ g.dev_type = DEV_PCAP;
break;
case 'D': /* destination mac */
- g.dst_mac = optarg;
- {
- struct ether_addr *mac = ether_aton(g.dst_mac);
- D("ether_aton(%s) gives %p", g.dst_mac, mac);
- }
+ g.dst_mac.name = optarg;
break;
+
case 'S': /* source mac */
- g.src_mac = optarg;
+ g.src_mac.name = optarg;
break;
case 'v':
verbose++;
}
}
- if (ifname == NULL) {
+ if (g.ifname == NULL) {
D("missing ifname");
usage();
}
- {
- int n = system_ncpus();
- if (g.cpus < 0 || g.cpus > n) {
- D("%d cpus is too high, have only %d cpus", g.cpus, n);
- usage();
- }
- if (g.cpus == 0)
- g.cpus = n;
+
+ i = system_ncpus();
+ if (g.cpus < 0 || g.cpus > i) {
+ D("%d cpus is too high, have only %d cpus", g.cpus, i);
+ usage();
}
+ if (g.cpus == 0)
+ g.cpus = i;
+
if (g.pkt_size < 16 || g.pkt_size > 1536) {
D("bad pktsize %d\n", g.pkt_size);
usage();
}
- if (td_body == sender_body && g.src_mac == NULL) {
- static char mybuf[20] = "ff:ff:ff:ff:ff:ff";
+ if (g.src_mac.name == NULL) {
+ static char mybuf[20] = "00:00:00:00:00:00";
/* retrieve source mac address. */
- if (source_hwaddr(ifname, mybuf) == -1) {
+ if (source_hwaddr(g.ifname, mybuf) == -1) {
D("Unable to retrieve source mac");
// continue, fail later
}
- g.src_mac = mybuf;
+ g.src_mac.name = mybuf;
}
+ /* extract address ranges */
+ extract_ip_range(&g.src_ip);
+ extract_ip_range(&g.dst_ip);
+ extract_mac_range(&g.src_mac);
+ extract_mac_range(&g.dst_mac);
+
+ if (g.dev_type == DEV_TAP) {
+ D("want to use tap %s", g.ifname);
+ g.main_fd = tap_alloc(g.ifname);
+ if (g.main_fd < 0) {
+ D("cannot open tap %s", g.ifname);
+ usage();
+ }
+ } else if (g.dev_type > DEV_NETMAP) {
+ char pcap_errbuf[PCAP_ERRBUF_SIZE];
- if (g.use_pcap) {
- D("using pcap on %s", ifname);
- g.p = pcap_open_live(ifname, 0, 1, 100, pcap_errbuf);
+ D("using pcap on %s", g.ifname);
+ pcap_errbuf[0] = '\0'; // init the buffer
+ g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf);
if (g.p == NULL) {
- D("cannot open pcap on %s", ifname);
+ D("cannot open pcap on %s", g.ifname);
usage();
}
- mmap_addr = NULL;
- fd = -1;
} else {
bzero(&nmr, sizeof(nmr));
nmr.nr_version = NETMAP_API;
@@ -889,21 +1390,21 @@ main(int arc, char **argv)
* which in turn may take some time for the PHY to
* reconfigure.
*/
- fd = open("/dev/netmap", O_RDWR);
- if (fd == -1) {
+ g.main_fd = open("/dev/netmap", O_RDWR);
+ if (g.main_fd == -1) {
D("Unable to open /dev/netmap");
- exit(1);
+ // fail later
} else {
- if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
+ if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
D("Unable to get if info without name");
} else {
D("map size is %d Kb", nmr.nr_memsize >> 10);
}
bzero(&nmr, sizeof(nmr));
nmr.nr_version = NETMAP_API;
- strncpy(nmr.nr_name, ifname, sizeof(nmr.nr_name));
- if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) {
- D("Unable to get if info for %s", ifname);
+ strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name));
+ if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
+ D("Unable to get if info for %s", g.ifname);
}
devqueues = nmr.nr_rx_rings;
}
@@ -919,11 +1420,12 @@ main(int arc, char **argv)
* inside the body of the threads, we prefer to keep this
* operation here to simplify the thread logic.
*/
- D("mmapping %d Kbytes", nmr.nr_memsize>>10);
- mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
+ D("mapping %d Kbytes", nmr.nr_memsize>>10);
+ g.mmap_size = nmr.nr_memsize;
+ g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
PROT_WRITE | PROT_READ,
- MAP_SHARED, fd, 0);
- if (mmap_addr == MAP_FAILED) {
+ MAP_SHARED, g.main_fd, 0);
+ if (g.mmap_addr == MAP_FAILED) {
D("Unable to mmap %d KB", nmr.nr_memsize >> 10);
// continue, fail later
}
@@ -937,8 +1439,8 @@ main(int arc, char **argv)
* give time to cards that take a long time to reset the PHY.
*/
nmr.nr_version = NETMAP_API;
- if (ioctl(fd, NIOCREGIF, &nmr) == -1) {
- D("Unable to register interface %s", ifname);
+ if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) {
+ D("Unable to register interface %s", g.ifname);
//continue, fail later
}
@@ -946,19 +1448,19 @@ main(int arc, char **argv)
/* Print some debug information. */
fprintf(stdout,
"%s %s: %d queues, %d threads and %d cpus.\n",
- (td_body == sender_body) ? "Sending on" : "Receiving from",
- ifname,
+ (g.td_body == sender_body) ? "Sending on" : "Receiving from",
+ g.ifname,
devqueues,
g.nthreads,
g.cpus);
- if (td_body == sender_body) {
+ if (g.td_body == sender_body) {
fprintf(stdout, "%s -> %s (%s -> %s)\n",
- g.src_ip, g.dst_ip,
- g.src_mac, g.dst_mac);
+ g.src_ip.name, g.dst_ip.name,
+ g.src_mac.name, g.dst_mac.name);
}
/* Exit if something went wrong. */
- if (fd < 0) {
+ if (g.main_fd < 0) {
D("aborting");
usage();
}
@@ -980,155 +1482,19 @@ main(int arc, char **argv)
global_nthreads = g.nthreads;
signal(SIGINT, sigint_h);
- if (g.use_pcap) {
- g.p = pcap_open_live(ifname, 0, 1, 100, NULL);
+#if 0 // XXX this is not needed, i believe
+ if (g.dev_type > DEV_NETMAP) {
+ g.p = pcap_open_live(g.ifname, 0, 1, 100, NULL);
if (g.p == NULL) {
- D("cannot open pcap on %s", ifname);
+ D("cannot open pcap on %s", g.ifname);
usage();
} else
- D("using pcap %p on %s", g.p, ifname);
- }
-
- targs = calloc(g.nthreads, sizeof(*targs));
- /*
- * Now create the desired number of threads, each one
- * using a single descriptor.
- */
- for (i = 0; i < g.nthreads; i++) {
- struct netmap_if *tnifp;
- struct nmreq tifreq;
- int tfd;
-
- if (g.use_pcap) {
- tfd = -1;
- tnifp = NULL;
- } else {
- /* register interface. */
- tfd = open("/dev/netmap", O_RDWR);
- if (tfd == -1) {
- D("Unable to open /dev/netmap");
- continue;
- }
-
- bzero(&tifreq, sizeof(tifreq));
- strncpy(tifreq.nr_name, ifname, sizeof(tifreq.nr_name));
- tifreq.nr_version = NETMAP_API;
- tifreq.nr_ringid = (g.nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
-
- /*
- * if we are acting as a receiver only, do not touch the transmit ring.
- * This is not the default because many apps may use the interface
- * in both directions, but a pure receiver does not.
- */
- if (td_body == receiver_body) {
- tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
- }
-
- if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
- D("Unable to register %s", ifname);
- continue;
- }
- tnifp = NETMAP_IF(mmap_addr, tifreq.nr_offset);
- }
- /* start threads. */
- bzero(&targs[i], sizeof(targs[i]));
- targs[i].g = &g;
- targs[i].used = 1;
- targs[i].completed = 0;
- targs[i].fd = tfd;
- targs[i].nmr = tifreq;
- targs[i].nifp = tnifp;
- targs[i].qfirst = (g.nthreads > 1) ? i : 0;
- targs[i].qlast = (g.nthreads > 1) ? i+1 :
- (td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
- targs[i].me = i;
- targs[i].affinity = g.cpus ? i % g.cpus : -1;
- if (td_body == sender_body) {
- /* initialize the packet to send. */
- initialize_packet(&targs[i]);
- }
-
- if (pthread_create(&targs[i].thread, NULL, td_body,
- &targs[i]) == -1) {
- D("Unable to create thread %d", i);
- targs[i].used = 0;
- }
- }
-
- {
- uint64_t my_count = 0, prev = 0;
- uint64_t count = 0;
- double delta_t;
- struct timeval tic, toc;
-
- gettimeofday(&toc, NULL);
- for (;;) {
- struct timeval now, delta;
- uint64_t pps;
- int done = 0;
-
- delta.tv_sec = report_interval/1000;
- delta.tv_usec = (report_interval%1000)*1000;
- select(0, NULL, NULL, NULL, &delta);
- gettimeofday(&now, NULL);
- timersub(&now, &toc, &toc);
- my_count = 0;
- for (i = 0; i < g.nthreads; i++) {
- my_count += targs[i].count;
- if (targs[i].used == 0)
- done++;
- }
- pps = toc.tv_sec* 1000000 + toc.tv_usec;
- if (pps < 10000)
- continue;
- pps = ((my_count - prev) * 1000000 + pps / 2) / pps;
- D("%" PRIu64 " pps", pps);
- prev = my_count;
- toc = now;
- if (done == g.nthreads)
- break;
- }
-
- timerclear(&tic);
- timerclear(&toc);
- for (i = 0; i < g.nthreads; i++) {
- /*
- * Join active threads, unregister interfaces and close
- * file descriptors.
- */
- pthread_join(targs[i].thread, NULL);
- ioctl(targs[i].fd, NIOCUNREGIF, &targs[i].nmr);
- close(targs[i].fd);
-
- if (targs[i].completed == 0)
- continue;
-
- /*
- * Collect threads output and extract information about
- * how long it took to send all the packets.
- */
- count += targs[i].count;
- if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <))
- tic = targs[i].tic;
- if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >))
- toc = targs[i].toc;
+ D("using pcap %p on %s", g.p, g.ifname);
}
-
- /* print output. */
- timersub(&toc, &tic, &toc);
- delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
- if (td_body == sender_body)
- tx_output(count, g.pkt_size, delta_t);
- else
- rx_output(count, delta_t);
- }
-
- if (g.use_pcap == 0) {
- ioctl(fd, NIOCUNREGIF, &nmr);
- munmap(mmap_addr, nmr.nr_memsize);
- close(fd);
- }
-
- return (0);
+#endif // XXX
+ start_threads(&g);
+ main_thread(&g);
+ return 0;
}
+
/* end of file */
OpenPOWER on IntegriCloud