diff options
author | luigi <luigi@FreeBSD.org> | 2014-02-18 05:01:04 +0000 |
---|---|---|
committer | luigi <luigi@FreeBSD.org> | 2014-02-18 05:01:04 +0000 |
commit | 5bacc3bb87b954978543b0d82a4d5705e33f5c06 (patch) | |
tree | a79f129924ca9cf087c1e108d2d184a16ac1e42b /tools | |
parent | dd5bb071cd203986ef23e5ceecdcef3cea848542 (diff) | |
download | FreeBSD-src-5bacc3bb87b954978543b0d82a4d5705e33f5c06.zip FreeBSD-src-5bacc3bb87b954978543b0d82a4d5705e33f5c06.tar.gz |
MFH: sync the netmap code with the one in HEAD
(enhanced VALE switch, netmap pipes, emulated netmap mode).
See details in the log for svn 261909.
Diffstat (limited to 'tools')
-rw-r--r-- | tools/tools/netmap/Makefile | 28 | ||||
-rw-r--r-- | tools/tools/netmap/README | 17 | ||||
-rw-r--r-- | tools/tools/netmap/bridge.c | 203 | ||||
-rw-r--r-- | tools/tools/netmap/click-test.cfg | 19 | ||||
-rw-r--r-- | tools/tools/netmap/nm_util.c | 250 | ||||
-rw-r--r-- | tools/tools/netmap/nm_util.h | 183 | ||||
-rw-r--r-- | tools/tools/netmap/pcap.c | 654 | ||||
-rw-r--r-- | tools/tools/netmap/pkt-gen.c | 1064 | ||||
-rw-r--r-- | tools/tools/netmap/vale-ctl.c | 43 |
9 files changed, 803 insertions, 1658 deletions
diff --git a/tools/tools/netmap/Makefile b/tools/tools/netmap/Makefile index d737bac..c502473 100644 --- a/tools/tools/netmap/Makefile +++ b/tools/tools/netmap/Makefile @@ -3,26 +3,30 @@ # # For multiple programs using a single source file each, # we can just define 'progs' and create custom targets. -PROGS = pkt-gen bridge vale-ctl testpcap libnetmap.so +PROGS = pkt-gen bridge vale-ctl -CLEANFILES = $(PROGS) pcap.o nm_util.o +CLEANFILES = $(PROGS) *.o NO_MAN= -CFLAGS += -Werror -Wall -nostdinc -I/usr/include -I../../../sys +CFLAGS += -Werror -Wall # -nostdinc -I/usr/include -I../../../sys CFLAGS += -Wextra -LDFLAGS += -lpthread -lpcap +LDFLAGS += -lpthread +.ifdef WITHOUT_PCAP +CFLAGS += -DNO_PCAP +.else +LDFLAGS += -lpcap +.endif .include <bsd.prog.mk> .include <bsd.lib.mk> all: $(PROGS) -pkt-gen bridge: nm_util.o - $(CC) $(CFLAGS) -o ${.TARGET} ${.TARGET:=.c} nm_util.o $(LDFLAGS) +pkt-gen: pkt-gen.o + $(CC) $(CFLAGS) -o pkt-gen pkt-gen.o $(LDFLAGS) -testpcap: pcap.c libnetmap.so - $(CC) $(CFLAGS) -DTEST -L. -lnetmap -o ${.TARGET} pcap.c - -libnetmap.so: pcap.c nm_util.c - $(CC) $(CFLAGS) -fpic -c ${.ALLSRC} - $(CC) -shared -o ${.TARGET} ${.ALLSRC:.c=.o} +bridge: bridge.o + $(CC) $(CFLAGS) -o bridge bridge.o + +vale-ctl: vale-ctl.o + $(CC) $(CFLAGS) -o vale-ctl vale-ctl.o diff --git a/tools/tools/netmap/README b/tools/tools/netmap/README index 2bde6f2..40378e6 100644 --- a/tools/tools/netmap/README +++ b/tools/tools/netmap/README @@ -6,19 +6,4 @@ This directory contains examples that use netmap bridge a two-port jumper wire, also using the native API - testpcap a jumper wire using libnetmap (or libpcap) - - click* various click examples - ------------------------------------------------------------- -Some performance data as of may 2012 for applications using libpcap. -Throughput is generally in Mpps computed with the 64-byte frames, -using 1 core on a 2.9GHz CPU and 10Gbit/s interface - -Libpcap version -- Application --------------------- -BSD netmap ---------------------------------------------------- - 0.77 3.82 ports/trafshow (version 5) - 0.94 7.7 net-mgmt/ipcad (ip accounting daemon) - 0.9 5.0 net-mgmt/darkstat (ip accounting + graphing) - 0.83 2.45 net-mgmt/iftop (curses traffic display) + vale-ctl the program to control VALE bridges diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c index 0aca44d..0895d4e 100644 --- a/tools/tools/netmap/bridge.c +++ b/tools/tools/netmap/bridge.c @@ -1,5 +1,5 @@ /* - * (C) 2011 Luigi Rizzo, Matteo Landi + * (C) 2011-2014 Luigi Rizzo, Matteo Landi * * BSD license * @@ -9,14 +9,15 @@ * $FreeBSD$ */ -#include "nm_util.h" - +#include <stdio.h> +#define NETMAP_WITH_LIBS +#include <net/netmap_user.h> +#include <sys/poll.h> int verbose = 0; -char *version = "$Id$"; - static int do_abort = 0; +static int zerocopy = 1; /* enable zerocopy if possible */ static void sigint_h(int sig) @@ -28,6 +29,26 @@ sigint_h(int sig) /* + * how many packets on this set of queues ? + */ +int +pkt_queued(struct nm_desc *d, int tx) +{ + u_int i, tot = 0; + + if (tx) { + for (i = d->first_tx_ring; i <= d->last_tx_ring; i++) { + tot += nm_ring_space(NETMAP_TXRING(d->nifp, i)); + } + } else { + for (i = d->first_rx_ring; i <= d->last_rx_ring; i++) { + tot += nm_ring_space(NETMAP_RXRING(d->nifp, i)); + } + } + return tot; +} + +/* * move up to 'limit' pkts from rxring to txring swapping buffers. */ static int @@ -42,20 +63,16 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, msg, rxring->flags, txring->flags); j = rxring->cur; /* RX */ k = txring->cur; /* TX */ - if (rxring->avail < limit) - limit = rxring->avail; - if (txring->avail < limit) - limit = txring->avail; + m = nm_ring_space(rxring); + if (m < limit) + limit = m; + m = nm_ring_space(txring); + if (m < limit) + limit = m; m = limit; while (limit-- > 0) { struct netmap_slot *rs = &rxring->slot[j]; struct netmap_slot *ts = &txring->slot[k]; -#ifdef NO_SWAP - char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx); - char *txbuf = NETMAP_BUF(txring, ts->buf_idx); -#else - uint32_t pkt; -#endif /* swap packets */ if (ts->buf_idx < 2 || rs->buf_idx < 2) { @@ -63,31 +80,31 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, j, rs->buf_idx, k, ts->buf_idx); sleep(2); } -#ifndef NO_SWAP - pkt = ts->buf_idx; - ts->buf_idx = rs->buf_idx; - rs->buf_idx = pkt; -#endif /* copy the packet length. */ - if (rs->len < 14 || rs->len > 2048) + if (rs->len > 2048) { D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k); - else if (verbose > 1) + rs->len = 0; + } else if (verbose > 1) { D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k); + } ts->len = rs->len; -#ifdef NO_SWAP - pkt_copy(rxbuf, txbuf, ts->len); -#else - /* report the buffer change. */ - ts->flags |= NS_BUF_CHANGED; - rs->flags |= NS_BUF_CHANGED; -#endif /* NO_SWAP */ - j = NETMAP_RING_NEXT(rxring, j); - k = NETMAP_RING_NEXT(txring, k); + if (zerocopy) { + uint32_t pkt = ts->buf_idx; + ts->buf_idx = rs->buf_idx; + rs->buf_idx = pkt; + /* report the buffer change. */ + ts->flags |= NS_BUF_CHANGED; + rs->flags |= NS_BUF_CHANGED; + } else { + char *rxbuf = NETMAP_BUF(rxring, rs->buf_idx); + char *txbuf = NETMAP_BUF(txring, ts->buf_idx); + nm_pkt_copy(rxbuf, txbuf, ts->len); + } + j = nm_ring_next(rxring, j); + k = nm_ring_next(txring, k); } - rxring->avail -= m; - txring->avail -= m; - rxring->cur = j; - txring->cur = k; + rxring->head = rxring->cur = j; + txring->head = txring->cur = k; if (verbose && m > 0) D("%s sent %d packets to %p", msg, m, txring); @@ -96,22 +113,22 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, /* move packts from src to destination */ static int -move(struct my_ring *src, struct my_ring *dst, u_int limit) +move(struct nm_desc *src, struct nm_desc *dst, u_int limit) { struct netmap_ring *txring, *rxring; - u_int m = 0, si = src->begin, di = dst->begin; - const char *msg = (src->queueid & NETMAP_SW_RING) ? + u_int m = 0, si = src->first_rx_ring, di = dst->first_tx_ring; + const char *msg = (src->req.nr_ringid & NETMAP_SW_RING) ? "host->net" : "net->host"; - while (si < src->end && di < dst->end) { + while (si <= src->last_rx_ring && di <= dst->last_tx_ring) { rxring = NETMAP_RXRING(src->nifp, si); txring = NETMAP_TXRING(dst->nifp, di); ND("txring %p rxring %p", txring, rxring); - if (rxring->avail == 0) { + if (nm_ring_empty(rxring)) { si++; continue; } - if (txring->avail == 0) { + if (nm_ring_empty(txring)) { di++; continue; } @@ -121,28 +138,6 @@ move(struct my_ring *src, struct my_ring *dst, u_int limit) return (m); } -/* - * how many packets on this set of queues ? - */ -static int -pkt_queued(struct my_ring *me, int tx) -{ - u_int i, tot = 0; - - ND("me %p begin %d end %d", me, me->begin, me->end); - for (i = me->begin; i < me->end; i++) { - struct netmap_ring *ring = tx ? - NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i); - tot += ring->avail; - } - if (0 && verbose && tot && !tx) - D("ring %s %s %s has %d avail at %d", - me->ifname, tx ? "tx": "rx", - me->end >= me->nifp->ni_tx_rings ? // XXX who comes first ? - "host":"net", - tot, NETMAP_TXRING(me->nifp, me->begin)->cur); - return tot; -} static void usage(void) @@ -163,17 +158,16 @@ int main(int argc, char **argv) { struct pollfd pollfd[2]; - int i, ch; + int ch; u_int burst = 1024, wait_link = 4; - struct my_ring me[2]; + struct nm_desc *pa = NULL, *pb = NULL; char *ifa = NULL, *ifb = NULL; + char ifabuf[64] = { 0 }; - fprintf(stderr, "%s %s built %s %s\n", - argv[0], version, __DATE__, __TIME__); - - bzero(me, sizeof(me)); + fprintf(stderr, "%s built %s %s\n", + argv[0], __DATE__, __TIME__); - while ( (ch = getopt(argc, argv, "b:i:vw:")) != -1) { + while ( (ch = getopt(argc, argv, "b:ci:vw:")) != -1) { switch (ch) { default: D("bad option %c %s", ch, optarg); @@ -191,6 +185,9 @@ main(int argc, char **argv) D("%s ignored, already have 2 interfaces", optarg); break; + case 'c': + zerocopy = 0; /* do not zerocopy */ + break; case 'v': verbose++; break; @@ -224,34 +221,38 @@ main(int argc, char **argv) D("invalid wait_link %d, set to 4", wait_link); wait_link = 4; } - /* setup netmap interface #1. */ - me[0].ifname = ifa; - me[1].ifname = ifb; if (!strcmp(ifa, ifb)) { D("same interface, endpoint 0 goes to host"); - i = NETMAP_SW_RING; + snprintf(ifabuf, sizeof(ifabuf) - 1, "%s^", ifa); + ifa = ifabuf; } else { /* two different interfaces. Take all rings on if1 */ - i = 0; // all hw rings } - if (netmap_open(me, i, 1)) + pa = nm_open(ifa, NULL, 0, NULL); + if (pa == NULL) { + D("cannot open %s", ifa); return (1); - me[1].mem = me[0].mem; /* copy the pointer, so only one mmap */ - if (netmap_open(me+1, 0, 1)) + } + // XXX use a single mmap ? + pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa); + if (pb == NULL) { + D("cannot open %s", ifb); + nm_close(pa); return (1); + } + zerocopy = zerocopy && (pa->mem == pb->mem); + D("------- zerocopy %ssupported", zerocopy ? "" : "NOT "); /* setup poll(2) variables. */ memset(pollfd, 0, sizeof(pollfd)); - for (i = 0; i < 2; i++) { - pollfd[i].fd = me[i].fd; - pollfd[i].events = (POLLIN); - } + pollfd[0].fd = pa->fd; + pollfd[1].fd = pb->fd; D("Wait %d secs for link to come up...", wait_link); sleep(wait_link); D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.", - me[0].ifname, me[0].queueid, me[0].nifp->ni_rx_rings, - me[1].ifname, me[1].queueid, me[1].nifp->ni_rx_rings); + pa->req.nr_name, pa->first_rx_ring, pa->req.nr_rx_rings, + pb->req.nr_name, pb->first_rx_ring, pb->req.nr_rx_rings); /* main loop */ signal(SIGINT, sigint_h); @@ -259,8 +260,8 @@ main(int argc, char **argv) int n0, n1, ret; pollfd[0].events = pollfd[1].events = 0; pollfd[0].revents = pollfd[1].revents = 0; - n0 = pkt_queued(me, 0); - n1 = pkt_queued(me + 1, 0); + n0 = pkt_queued(pa, 0); + n1 = pkt_queued(pb, 0); if (n0) pollfd[1].events |= POLLOUT; else @@ -276,39 +277,41 @@ main(int argc, char **argv) ret <= 0 ? "timeout" : "ok", pollfd[0].events, pollfd[0].revents, - pkt_queued(me, 0), - me[0].rx->cur, - pkt_queued(me, 1), + pkt_queued(pa, 0), + NETMAP_RXRING(pa->nifp, pa->cur_rx_ring)->cur, + pkt_queued(pa, 1), pollfd[1].events, pollfd[1].revents, - pkt_queued(me+1, 0), - me[1].rx->cur, - pkt_queued(me+1, 1) + pkt_queued(pb, 0), + NETMAP_RXRING(pb->nifp, pb->cur_rx_ring)->cur, + pkt_queued(pb, 1) ); if (ret < 0) continue; if (pollfd[0].revents & POLLERR) { - D("error on fd0, rxcur %d@%d", - me[0].rx->avail, me[0].rx->cur); + struct netmap_ring *rx = NETMAP_RXRING(pa->nifp, pa->cur_rx_ring); + D("error on fd0, rx [%d,%d,%d)", + rx->head, rx->cur, rx->tail); } if (pollfd[1].revents & POLLERR) { - D("error on fd1, rxcur %d@%d", - me[1].rx->avail, me[1].rx->cur); + struct netmap_ring *rx = NETMAP_RXRING(pb->nifp, pb->cur_rx_ring); + D("error on fd1, rx [%d,%d,%d)", + rx->head, rx->cur, rx->tail); } if (pollfd[0].revents & POLLOUT) { - move(me + 1, me, burst); + move(pb, pa, burst); // XXX we don't need the ioctl */ // ioctl(me[0].fd, NIOCTXSYNC, NULL); } if (pollfd[1].revents & POLLOUT) { - move(me, me + 1, burst); + move(pa, pb, burst); // XXX we don't need the ioctl */ // ioctl(me[1].fd, NIOCTXSYNC, NULL); } } D("exiting"); - netmap_close(me + 1); - netmap_close(me + 0); + nm_close(pb); + nm_close(pa); return (0); } diff --git a/tools/tools/netmap/click-test.cfg b/tools/tools/netmap/click-test.cfg deleted file mode 100644 index fc5759f..0000000 --- a/tools/tools/netmap/click-test.cfg +++ /dev/null @@ -1,19 +0,0 @@ -// -// $FreeBSD$ -// -// A sample test configuration for click -// -// -// create a switch - -myswitch :: EtherSwitch; - -// two input devices - -c0 :: FromDevice(ix0, PROMISC true); -c1 :: FromDevice(ix1, PROMISC true); - -// and now pass packets around - -c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0); -c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1); diff --git a/tools/tools/netmap/nm_util.c b/tools/tools/netmap/nm_util.c deleted file mode 100644 index 6153603..0000000 --- a/tools/tools/netmap/nm_util.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (C) 2012 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD$ - * $Id$ - * - * utilities to use netmap devices. - * This does the basic functions of opening a device and issuing - * ioctls() - */ - -#include "nm_util.h" - -extern int verbose; - -int -nm_do_ioctl(struct my_ring *me, u_long what, int subcmd) -{ - struct ifreq ifr; - int error; -#if defined( __FreeBSD__ ) || defined (__APPLE__) - int fd = me->fd; -#endif -#ifdef linux - struct ethtool_value eval; - int fd; - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) { - printf("Error: cannot get device control socket.\n"); - return -1; - } -#endif /* linux */ - - (void)subcmd; // unused - bzero(&ifr, sizeof(ifr)); - strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name)); - switch (what) { - case SIOCSIFFLAGS: -#ifndef __APPLE__ - ifr.ifr_flagshigh = me->if_flags >> 16; -#endif - ifr.ifr_flags = me->if_flags & 0xffff; - break; - -#if defined( __FreeBSD__ ) - case SIOCSIFCAP: - ifr.ifr_reqcap = me->if_reqcap; - ifr.ifr_curcap = me->if_curcap; - break; -#endif -#ifdef linux - case SIOCETHTOOL: - eval.cmd = subcmd; - eval.data = 0; - ifr.ifr_data = (caddr_t)&eval; - break; -#endif /* linux */ - } - error = ioctl(fd, what, &ifr); - if (error) - goto done; - switch (what) { - case SIOCGIFFLAGS: -#ifndef __APPLE__ - me->if_flags = (ifr.ifr_flagshigh << 16) | - (0xffff & ifr.ifr_flags); -#endif - if (verbose) - D("flags are 0x%x", me->if_flags); - break; - -#if defined( __FreeBSD__ ) - case SIOCGIFCAP: - me->if_reqcap = ifr.ifr_reqcap; - me->if_curcap = ifr.ifr_curcap; - if (verbose) - D("curcap are 0x%x", me->if_curcap); - break; -#endif /* __FreeBSD__ */ - } -done: -#ifdef linux - close(fd); -#endif - if (error) - D("ioctl error %d %lu", error, what); - return error; -} - -/* - * open a device. if me->mem is null then do an mmap. - * Returns the file descriptor. - * The extra flag checks configures promisc mode. - */ -int -netmap_open(struct my_ring *me, int ringid, int promisc) -{ - int fd, err, l; - struct nmreq req; - - me->fd = fd = open("/dev/netmap", O_RDWR); - if (fd < 0) { - D("Unable to open /dev/netmap"); - return (-1); - } - bzero(&req, sizeof(req)); - req.nr_version = NETMAP_API; - strncpy(req.nr_name, me->ifname, sizeof(req.nr_name)); - req.nr_ringid = ringid; - err = ioctl(fd, NIOCGINFO, &req); - if (err) { - D("cannot get info on %s, errno %d ver %d", - me->ifname, errno, req.nr_version); - goto error; - } - me->memsize = l = req.nr_memsize; - if (verbose) - D("memsize is %d MB", l>>20); - err = ioctl(fd, NIOCREGIF, &req); - if (err) { - D("Unable to register %s", me->ifname); - goto error; - } - - if (me->mem == NULL) { - me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); - if (me->mem == MAP_FAILED) { - D("Unable to mmap"); - me->mem = NULL; - goto error; - } - } - - - /* Set the operating mode. */ - if (ringid != NETMAP_SW_RING) { - nm_do_ioctl(me, SIOCGIFFLAGS, 0); - if ((me[0].if_flags & IFF_UP) == 0) { - D("%s is down, bringing up...", me[0].ifname); - me[0].if_flags |= IFF_UP; - } - if (promisc) { - me[0].if_flags |= IFF_PPROMISC; - nm_do_ioctl(me, SIOCSIFFLAGS, 0); - } - -#ifdef __FreeBSD__ - /* also disable checksums etc. */ - nm_do_ioctl(me, SIOCGIFCAP, 0); - me[0].if_reqcap = me[0].if_curcap; - me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE); - nm_do_ioctl(me+0, SIOCSIFCAP, 0); -#endif -#ifdef linux - /* disable: - * - generic-segmentation-offload - * - tcp-segmentation-offload - * - rx-checksumming - * - tx-checksumming - * XXX check how to set back the caps. - */ - nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SGSO); - nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STSO); - nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SRXCSUM); - nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STXCSUM); -#endif /* linux */ - } - - me->nifp = NETMAP_IF(me->mem, req.nr_offset); - me->queueid = ringid; - if (ringid & NETMAP_SW_RING) { - me->begin = req.nr_rx_rings; - me->end = me->begin + 1; - me->tx = NETMAP_TXRING(me->nifp, req.nr_tx_rings); - me->rx = NETMAP_RXRING(me->nifp, req.nr_rx_rings); - } else if (ringid & NETMAP_HW_RING) { - D("XXX check multiple threads"); - me->begin = ringid & NETMAP_RING_MASK; - me->end = me->begin + 1; - me->tx = NETMAP_TXRING(me->nifp, me->begin); - me->rx = NETMAP_RXRING(me->nifp, me->begin); - } else { - me->begin = 0; - me->end = req.nr_rx_rings; // XXX max of the two - me->tx = NETMAP_TXRING(me->nifp, 0); - me->rx = NETMAP_RXRING(me->nifp, 0); - } - return (0); -error: - close(me->fd); - return -1; -} - - -int -netmap_close(struct my_ring *me) -{ - D(""); - if (me->mem) - munmap(me->mem, me->memsize); - close(me->fd); - return (0); -} - - -/* - * how many packets on this set of queues ? - */ -int -pkt_queued(struct my_ring *me, int tx) -{ - u_int i, tot = 0; - - ND("me %p begin %d end %d", me, me->begin, me->end); - for (i = me->begin; i < me->end; i++) { - struct netmap_ring *ring = tx ? - NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i); - tot += ring->avail; - } - if (0 && verbose && tot && !tx) - D("ring %s %s %s has %d avail at %d", - me->ifname, tx ? "tx": "rx", - me->end >= me->nifp->ni_tx_rings ? // XXX who comes first ? - "host":"net", - tot, NETMAP_TXRING(me->nifp, me->begin)->cur); - return tot; -} diff --git a/tools/tools/netmap/nm_util.h b/tools/tools/netmap/nm_util.h deleted file mode 100644 index 0d64f13..0000000 --- a/tools/tools/netmap/nm_util.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright (C) 2012 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD$ - * $Id$ - * - * Some utilities to build netmap-based programs. - */ - -#ifndef _NM_UTIL_H -#define _NM_UTIL_H -#include <errno.h> -#include <signal.h> /* signal */ -#include <stdlib.h> -#include <stdio.h> -#include <inttypes.h> /* PRI* macros */ -#include <string.h> /* strcmp */ -#include <fcntl.h> /* open */ -#include <unistd.h> /* close */ -#include <ifaddrs.h> /* getifaddrs */ - -#include <sys/mman.h> /* PROT_* */ -#include <sys/ioctl.h> /* ioctl */ -#include <sys/poll.h> -#include <sys/socket.h> /* sockaddr.. */ -#include <arpa/inet.h> /* ntohs */ -#include <sys/param.h> -#include <sys/sysctl.h> /* sysctl */ -#include <sys/time.h> /* timersub */ - -#include <net/ethernet.h> -#include <net/if.h> /* ifreq */ - -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/udp.h> - -#include <net/netmap.h> -#include <net/netmap_user.h> - -#ifndef MY_PCAP /* use the system's pcap if available */ - -#ifdef NO_PCAP -#define PCAP_ERRBUF_SIZE 512 -typedef void pcap_t; -struct pcap_pkthdr; -#define pcap_inject(a,b,c) ((void)a, (void)b, (void)c, -1) -#define pcap_dispatch(a, b, c, d) (void)c -#define pcap_open_live(a, b, c, d, e) ((void)e, NULL) -#else /* !NO_PCAP */ -#include <pcap/pcap.h> // XXX do we need it ? -#endif /* !NO_PCAP */ - -#endif // XXX hack - -#include <pthread.h> /* pthread_* */ - -#ifdef linux -#define ifr_flagshigh ifr_flags -#define ifr_curcap ifr_flags -#define ifr_reqcap ifr_flags -#define IFF_PPROMISC IFF_PROMISC -#include <linux/ethtool.h> -#include <linux/sockios.h> - -#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME -#include <netinet/ether.h> /* ether_aton */ -#include <linux/if_packet.h> /* sockaddr_ll */ -#endif /* linux */ - -#ifdef __FreeBSD__ -#include <sys/endian.h> /* le64toh */ -#include <machine/param.h> - -#include <pthread_np.h> /* pthread w/ affinity */ -#include <sys/cpuset.h> /* cpu_set */ -#include <net/if_dl.h> /* LLADDR */ -#endif /* __FreeBSD__ */ - -#ifdef __APPLE__ -#define ifr_flagshigh ifr_flags // XXX -#define IFF_PPROMISC IFF_PROMISC -#include <net/if_dl.h> /* LLADDR */ -#define clock_gettime(a,b) \ - do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) -#endif /* __APPLE__ */ - -static inline int min(int a, int b) { return a < b ? a : b; } -extern int time_second; - -/* debug support */ -#define ND(format, ...) do {} while(0) -#define D(format, ...) \ - fprintf(stderr, "%s [%d] " format "\n", \ - __FUNCTION__, __LINE__, ##__VA_ARGS__) - -#define RD(lps, format, ...) \ - do { \ - static int t0, cnt; \ - if (t0 != time_second) { \ - t0 = time_second; \ - cnt = 0; \ - } \ - if (cnt++ < lps) \ - D(format, ##__VA_ARGS__); \ - } while (0) - - - -// XXX does it work on 32-bit machines ? -static inline void prefetch (const void *x) -{ - __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x)); -} - -// XXX only for multiples of 64 bytes, non overlapped. -static inline void -pkt_copy(const void *_src, void *_dst, int l) -{ - const uint64_t *src = _src; - uint64_t *dst = _dst; -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - if (unlikely(l >= 1024)) { - bcopy(src, dst, l); - return; - } - for (; l > 0; l-=64) { - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - } -} - -/* - * info on a ring we handle - */ -struct my_ring { - const char *ifname; - int fd; - char *mem; /* userspace mmap address */ - u_int memsize; - u_int queueid; - u_int begin, end; /* first..last+1 rings to check */ - struct netmap_if *nifp; - struct netmap_ring *tx, *rx; /* shortcuts */ - - uint32_t if_flags; - uint32_t if_reqcap; - uint32_t if_curcap; -}; -int netmap_open(struct my_ring *me, int ringid, int promisc); -int netmap_close(struct my_ring *me); -int nm_do_ioctl(struct my_ring *me, u_long what, int subcmd); -#endif /* _NM_UTIL_H */ diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c deleted file mode 100644 index f30f57b..0000000 --- a/tools/tools/netmap/pcap.c +++ /dev/null @@ -1,654 +0,0 @@ -/* - * (C) 2011-2012 Luigi Rizzo - * - * BSD license - * - * A simple library that maps some pcap functions onto netmap - * This is not 100% complete but enough to let tcpdump, trafshow - * and other apps work. - * - * $FreeBSD$ - */ - -#define MY_PCAP -#include "nm_util.h" - -char *version = "$Id$"; -int verbose = 0; - -/* - * We redefine here a number of structures that are in pcap.h - * so we can compile this file without the system header. - */ -#ifndef PCAP_ERRBUF_SIZE -#define PCAP_ERRBUF_SIZE 128 -/* - * Each packet is accompanied by a header including the timestamp, - * captured size and actual size. - */ -struct pcap_pkthdr { - struct timeval ts; /* time stamp */ - uint32_t caplen; /* length of portion present */ - uint32_t len; /* length this packet (off wire) */ -}; - -typedef struct pcap_if pcap_if_t; - -/* - * Representation of an interface address. - */ -struct pcap_addr { - struct pcap_addr *next; - struct sockaddr *addr; /* address */ - struct sockaddr *netmask; /* netmask for the above */ - struct sockaddr *broadaddr; /* broadcast addr for the above */ - struct sockaddr *dstaddr; /* P2P dest. address for the above */ -}; - -struct pcap_if { - struct pcap_if *next; - char *name; /* name to hand to "pcap_open_live()" */ - char *description; /* textual description of interface, or NULL */ - struct pcap_addr *addresses; - uint32_t flags; /* PCAP_IF_ interface flags */ -}; - -/* - * We do not support stats (yet) - */ -struct pcap_stat { - u_int ps_recv; /* number of packets received */ - u_int ps_drop; /* number of packets dropped */ - u_int ps_ifdrop; /* drops by interface XXX not yet supported */ -#ifdef WIN32 - u_int bs_capt; /* number of packets that reach the app. */ -#endif /* WIN32 */ -}; - -typedef void pcap_t; -typedef enum { - PCAP_D_INOUT = 0, - PCAP_D_IN, - PCAP_D_OUT -} pcap_direction_t; - - - -typedef void (*pcap_handler)(u_char *user, - const struct pcap_pkthdr *h, const u_char *bytes); - -char errbuf[PCAP_ERRBUF_SIZE]; - -pcap_t *pcap_open_live(const char *device, int snaplen, - int promisc, int to_ms, char *errbuf); - -int pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf); -void pcap_close(pcap_t *p); -int pcap_get_selectable_fd(pcap_t *p); -int pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user); -int pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf); -int pcap_setdirection(pcap_t *p, pcap_direction_t d); -char *pcap_lookupdev(char *errbuf); -int pcap_inject(pcap_t *p, const void *buf, size_t size); -int pcap_fileno(pcap_t *p); -const char *pcap_lib_version(void); - - -struct eproto { - const char *s; - u_short p; -}; -#endif /* !PCAP_ERRBUF_SIZE */ - -#ifndef TEST -/* - * build as a shared library - */ - -char pcap_version[] = "libnetmap version 0.3"; - -/* - * Our equivalent of pcap_t - */ -struct pcap_ring { - struct my_ring me; -#if 0 - const char *ifname; - - //struct nmreq nmr; - - int fd; - char *mem; /* userspace mmap address */ - u_int memsize; - u_int queueid; - u_int begin, end; /* first..last+1 rings to check */ - struct netmap_if *nifp; - - uint32_t if_flags; - uint32_t if_reqcap; - uint32_t if_curcap; -#endif - int snaplen; - char *errbuf; - int promisc; - int to_ms; - - struct pcap_pkthdr hdr; - - - struct pcap_stat st; - - char msg[PCAP_ERRBUF_SIZE]; -}; - - - -/* - * There is a set of functions that tcpdump expects even if probably - * not used - */ -struct eproto eproto_db[] = { - { "ip", ETHERTYPE_IP }, - { "arp", ETHERTYPE_ARP }, - { (char *)0, 0 } -}; - - -const char *pcap_lib_version(void) -{ - return pcap_version; -} - -int -pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf) -{ - pcap_if_t *top = NULL; -#ifndef linux - struct ifaddrs *i_head, *i; - pcap_if_t *cur; - struct pcap_addr *tail = NULL; - int l; - - D("listing all devs"); - *alldevsp = NULL; - i_head = NULL; - - if (getifaddrs(&i_head)) { - D("cannot get if addresses"); - return -1; - } - for (i = i_head; i; i = i->ifa_next) { - //struct ifaddrs *ifa; - struct pcap_addr *pca; - //struct sockaddr *sa; - - D("got interface %s", i->ifa_name); - if (!top || strcmp(top->name, i->ifa_name)) { - /* new interface */ - l = sizeof(*top) + strlen(i->ifa_name) + 1; - cur = calloc(1, l); - if (cur == NULL) { - D("no space for if descriptor"); - continue; - } - cur->name = (char *)(cur + 1); - //cur->flags = i->ifa_flags; - strcpy(cur->name, i->ifa_name); - cur->description = NULL; - cur->next = top; - top = cur; - tail = NULL; - } - /* now deal with addresses */ - D("%s addr family %d len %d %s %s", - top->name, - i->ifa_addr->sa_family, i->ifa_addr->sa_len, - i->ifa_netmask ? "Netmask" : "", - i->ifa_broadaddr ? "Broadcast" : ""); - l = sizeof(struct pcap_addr) + - (i->ifa_addr ? i->ifa_addr->sa_len:0) + - (i->ifa_netmask ? i->ifa_netmask->sa_len:0) + - (i->ifa_broadaddr? i->ifa_broadaddr->sa_len:0); - pca = calloc(1, l); - if (pca == NULL) { - D("no space for if addr"); - continue; - } -#define SA_NEXT(x) ((struct sockaddr *)((char *)(x) + (x)->sa_len)) - pca->addr = (struct sockaddr *)(pca + 1); - pkt_copy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len); - if (i->ifa_netmask) { - pca->netmask = SA_NEXT(pca->addr); - bcopy(i->ifa_netmask, pca->netmask, i->ifa_netmask->sa_len); - if (i->ifa_broadaddr) { - pca->broadaddr = SA_NEXT(pca->netmask); - bcopy(i->ifa_broadaddr, pca->broadaddr, i->ifa_broadaddr->sa_len); - } - } - if (tail == NULL) { - top->addresses = pca; - } else { - tail->next = pca; - } - tail = pca; - - } - freeifaddrs(i_head); -#endif /* !linux */ - (void)errbuf; /* UNUSED */ - *alldevsp = top; - return 0; -} - -void pcap_freealldevs(pcap_if_t *alldevs) -{ - (void)alldevs; /* UNUSED */ - D("unimplemented"); -} - -char * -pcap_lookupdev(char *buf) -{ - D("%s", buf); - strcpy(buf, "/dev/netmap"); - return buf; -} - -pcap_t * -pcap_create(const char *source, char *errbuf) -{ - D("src %s (call open liveted)", source); - return pcap_open_live(source, 0, 1, 100, errbuf); -} - -int -pcap_activate(pcap_t *p) -{ - D("pcap %p running", p); - return 0; -} - -int -pcap_can_set_rfmon(pcap_t *p) -{ - (void)p; /* UNUSED */ - D(""); - return 0; /* no we can't */ -} - -int -pcap_set_snaplen(pcap_t *p, int snaplen) -{ - struct pcap_ring *me = p; - - D("len %d", snaplen); - me->snaplen = snaplen; - return 0; -} - -int -pcap_snapshot(pcap_t *p) -{ - struct pcap_ring *me = p; - - D("len %d", me->snaplen); - return me->snaplen; -} - -int -pcap_lookupnet(const char *device, uint32_t *netp, - uint32_t *maskp, char *errbuf) -{ - - (void)errbuf; /* UNUSED */ - D("device %s", device); - inet_aton("10.0.0.255", (struct in_addr *)netp); - inet_aton("255.255.255.0",(struct in_addr *) maskp); - return 0; -} - -int -pcap_set_promisc(pcap_t *p, int promisc) -{ - struct pcap_ring *me = p; - - D("promisc %d", promisc); - if (nm_do_ioctl(&me->me, SIOCGIFFLAGS, 0)) - D("SIOCGIFFLAGS failed"); - if (promisc) { - me->me.if_flags |= IFF_PPROMISC; - } else { - me->me.if_flags &= ~IFF_PPROMISC; - } - if (nm_do_ioctl(&me->me, SIOCSIFFLAGS, 0)) - D("SIOCSIFFLAGS failed"); - return 0; -} - -int -pcap_set_timeout(pcap_t *p, int to_ms) -{ - struct pcap_ring *me = p; - - D("%d ms", to_ms); - me->to_ms = to_ms; - return 0; -} - -struct bpf_program; - -int -pcap_compile(pcap_t *p, struct bpf_program *fp, - const char *str, int optimize, uint32_t netmask) -{ - (void)p; /* UNUSED */ - (void)fp; /* UNUSED */ - (void)optimize; /* UNUSED */ - (void)netmask; /* UNUSED */ - D("%s", str); - return 0; -} - -int -pcap_setfilter(pcap_t *p, struct bpf_program *fp) -{ - (void)p; /* UNUSED */ - (void)fp; /* UNUSED */ - D(""); - return 0; -} - -int -pcap_datalink(pcap_t *p) -{ - (void)p; /* UNUSED */ - D("returns 1"); - return 1; // ethernet -} - -const char * -pcap_datalink_val_to_name(int dlt) -{ - D("%d returns DLT_EN10MB", dlt); - return "DLT_EN10MB"; -} - -const char * -pcap_datalink_val_to_description(int dlt) -{ - D("%d returns Ethernet link", dlt); - return "Ethernet link"; -} - -struct pcap_stat; -int -pcap_stats(pcap_t *p, struct pcap_stat *ps) -{ - struct pcap_ring *me = p; - ND(""); - - *ps = me->st; - return 0; /* accumulate from pcap_dispatch() */ -}; - -char * -pcap_geterr(pcap_t *p) -{ - struct pcap_ring *me = p; - - D(""); - return me->msg; -} - -pcap_t * -pcap_open_live(const char *device, int snaplen, - int promisc, int to_ms, char *errbuf) -{ - struct pcap_ring *me; - int l; - - (void)snaplen; /* UNUSED */ - (void)errbuf; /* UNUSED */ - if (!device) { - D("missing device name"); - return NULL; - } - - l = strlen(device) + 1; - D("request to open %s snaplen %d promisc %d timeout %dms", - device, snaplen, promisc, to_ms); - me = calloc(1, sizeof(*me) + l); - if (me == NULL) { - D("failed to allocate struct for %s", device); - return NULL; - } - me->me.ifname = (char *)(me + 1); - strcpy((char *)me->me.ifname, device); - if (netmap_open(&me->me, 0, promisc)) { - D("error opening %s", device); - free(me); - return NULL; - } - me->to_ms = to_ms; - - return (pcap_t *)me; -} - -void -pcap_close(pcap_t *p) -{ - struct my_ring *me = p; - - D(""); - if (!me) - return; - if (me->mem) - munmap(me->mem, me->memsize); - /* restore original flags ? */ - close(me->fd); - bzero(me, sizeof(*me)); - free(me); -} - -int -pcap_fileno(pcap_t *p) -{ - struct my_ring *me = p; - D("returns %d", me->fd); - return me->fd; -} - -int -pcap_get_selectable_fd(pcap_t *p) -{ - struct my_ring *me = p; - - ND(""); - return me->fd; -} - -int -pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf) -{ - (void)p; /* UNUSED */ - (void)errbuf; /* UNUSED */ - D("mode is %d", nonblock); - return 0; /* ignore */ -} - -int -pcap_setdirection(pcap_t *p, pcap_direction_t d) -{ - (void)p; /* UNUSED */ - (void)d; /* UNUSED */ - D(""); - return 0; /* ignore */ -}; - -int -pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user) -{ - struct pcap_ring *pme = p; - struct my_ring *me = &pme->me; - int got = 0; - u_int si; - - ND("cnt %d", cnt); - if (cnt == 0) - cnt = -1; - /* scan all rings */ - for (si = me->begin; si < me->end; si++) { - struct netmap_ring *ring = NETMAP_RXRING(me->nifp, si); - ND("ring has %d pkts", ring->avail); - if (ring->avail == 0) - continue; - pme->hdr.ts = ring->ts; - /* - * XXX a proper prefetch should be done as - * prefetch(i); callback(i-1); ... - */ - while ((cnt == -1 || cnt != got) && ring->avail > 0) { - u_int i = ring->cur; - u_int idx = ring->slot[i].buf_idx; - if (idx < 2) { - D("%s bogus RX index %d at offset %d", - me->nifp->ni_name, idx, i); - sleep(2); - } - u_char *buf = (u_char *)NETMAP_BUF(ring, idx); - prefetch(buf); - pme->hdr.len = pme->hdr.caplen = ring->slot[i].len; - // D("call %p len %d", p, me->hdr.len); - callback(user, &pme->hdr, buf); - ring->cur = NETMAP_RING_NEXT(ring, i); - ring->avail--; - got++; - } - } - pme->st.ps_recv += got; - return got; -} - -int -pcap_inject(pcap_t *p, const void *buf, size_t size) -{ - struct my_ring *me = p; - u_int si; - - ND("cnt %d", cnt); - /* scan all rings */ - for (si = me->begin; si < me->end; si++) { - struct netmap_ring *ring = NETMAP_TXRING(me->nifp, si); - - ND("ring has %d pkts", ring->avail); - if (ring->avail == 0) - continue; - u_int i = ring->cur; - u_int idx = ring->slot[i].buf_idx; - if (idx < 2) { - D("%s bogus TX index %d at offset %d", - me->nifp->ni_name, idx, i); - sleep(2); - } - u_char *dst = (u_char *)NETMAP_BUF(ring, idx); - ring->slot[i].len = size; - pkt_copy(buf, dst, size); - ring->cur = NETMAP_RING_NEXT(ring, i); - ring->avail--; - // if (ring->avail == 0) ioctl(me->fd, NIOCTXSYNC, NULL); - return size; - } - errno = ENOBUFS; - return -1; -} - -int -pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user) -{ - struct pcap_ring *me = p; - struct pollfd fds[1]; - int i; - - ND("cnt %d", cnt); - memset(fds, 0, sizeof(fds)); - fds[0].fd = me->me.fd; - fds[0].events = (POLLIN); - - while (cnt == -1 || cnt > 0) { - if (poll(fds, 1, me->to_ms) <= 0) { - D("poll error/timeout"); - continue; - } - i = pcap_dispatch(p, cnt, callback, user); - if (cnt > 0) - cnt -= i; - } - return 0; -} - -#endif /* !TEST */ - -#ifdef TEST /* build test code */ -void do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf) -{ - pcap_inject((pcap_t *)user, buf, h->caplen); -} - -/* - * a simple pcap test program, bridge between two interfaces. - */ -int -main(int argc, char **argv) -{ - pcap_t *p0, *p1; - int burst = 1024; - struct pollfd pollfd[2]; - - fprintf(stderr, "%s %s built %s %s\n", - argv[0], version, __DATE__, __TIME__); - - while (argc > 1 && !strcmp(argv[1], "-v")) { - verbose++; - argv++; - argc--; - } - - if (argc < 3 || argc > 4 || !strcmp(argv[1], argv[2])) { - D("Usage: %s IFNAME1 IFNAME2 [BURST]", argv[0]); - return (1); - } - if (argc > 3) - burst = atoi(argv[3]); - - p0 = pcap_open_live(argv[1], 0, 1, 100, NULL); - p1 = pcap_open_live(argv[2], 0, 1, 100, NULL); - D("%s", version); - D("open returns %p %p", p0, p1); - if (!p0 || !p1) - return(1); - bzero(pollfd, sizeof(pollfd)); - pollfd[0].fd = pcap_fileno(p0); - pollfd[1].fd = pcap_fileno(p1); - pollfd[0].events = pollfd[1].events = POLLIN; - for (;;) { - /* do i need to reset ? */ - pollfd[0].revents = pollfd[1].revents = 0; - int ret = poll(pollfd, 2, 1000); - if (ret <= 0 || verbose) - D("poll %s [0] ev %x %x [1] ev %x %x", - ret <= 0 ? "timeout" : "ok", - pollfd[0].events, - pollfd[0].revents, - pollfd[1].events, - pollfd[1].revents); - if (ret < 0) - continue; - if (pollfd[0].revents & POLLIN) - pcap_dispatch(p0, burst, do_send, p1); - if (pollfd[1].revents & POLLIN) - pcap_dispatch(p1, burst, do_send, p0); - } - - return (0); -} -#endif /* TEST */ diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c index 901175e..8e78fa8 100644 --- a/tools/tools/netmap/pkt-gen.c +++ b/tools/tools/netmap/pkt-gen.c @@ -1,5 +1,6 @@ /* - * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -25,7 +26,7 @@ /* * $FreeBSD$ - * $Id$ + * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ * * Example program to show how to build a multithreaded packet * source/sink using the netmap device. @@ -36,20 +37,94 @@ * */ -#include "nm_util.h" +#define _GNU_SOURCE /* for CPU_SET() */ +#include <stdio.h> +#define NETMAP_WITH_LIBS +#include <net/netmap_user.h> + #include <ctype.h> // isprint() +#include <unistd.h> // sysconf() +#include <sys/poll.h> +#include <arpa/inet.h> /* ntohs */ +#include <sys/sysctl.h> /* sysctl */ +#include <ifaddrs.h> /* getifaddrs */ +#include <net/ethernet.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> + +#include <pthread.h> + +#ifndef NO_PCAP +#include <pcap/pcap.h> +#endif + +#ifdef linux + +#define cpuset_t cpu_set_t + +#define ifr_flagshigh ifr_flags /* only the low 16 bits here */ +#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ +#include <linux/ethtool.h> +#include <linux/sockios.h> + +#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME +#include <netinet/ether.h> /* ether_aton */ +#include <linux/if_packet.h> /* sockaddr_ll */ +#endif /* linux */ -const char *default_payload="netmap pkt-gen payload\n" +#ifdef __FreeBSD__ +#include <sys/endian.h> /* le64toh */ +#include <machine/param.h> + +#include <pthread_np.h> /* pthread w/ affinity */ +#include <sys/cpuset.h> /* cpu_set */ +#include <net/if_dl.h> /* LLADDR */ +#endif /* __FreeBSD__ */ + +#ifdef __APPLE__ + +#define cpuset_t uint64_t // XXX +static inline void CPU_ZERO(cpuset_t *p) +{ + *p = 0; +} + +static inline void CPU_SET(uint32_t i, cpuset_t *p) +{ + *p |= 1<< (i & 0x3f); +} + +#define pthread_setaffinity_np(a, b, c) ((void)a, 0) + +#define ifr_flagshigh ifr_flags // XXX +#define IFF_PPROMISC IFF_PROMISC +#include <net/if_dl.h> /* LLADDR */ +#define clock_gettime(a,b) \ + do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) +#endif /* __APPLE__ */ + +const char *default_payload="netmap pkt-gen DIRECT payload\n" "http://info.iet.unipi.it/~luigi/netmap/ "; -int time_second; // support for RD() debugging macro +const char *indirect_payload="netmap pkt-gen indirect payload\n" + "http://info.iet.unipi.it/~luigi/netmap/ "; int verbose = 0; -#define SKIP_PAYLOAD 1 /* do not check payload. */ +#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */ + + +#define VIRT_HDR_1 10 /* length of a base vnet-hdr */ +#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ +#define VIRT_HDR_MAX VIRT_HDR_2 +struct virt_header { + uint8_t fields[VIRT_HDR_MAX]; +}; struct pkt { + struct virt_header vh; struct ether_header eh; struct ip ip; struct udphdr udp; @@ -58,8 +133,8 @@ struct pkt { struct ip_range { char *name; - struct in_addr start, end, cur; - uint16_t port0, port1, cur_p; + uint32_t start, end; /* same as struct in_addr */ + uint16_t port0, port1; }; struct mac_range { @@ -67,6 +142,8 @@ struct mac_range { struct ether_addr start, end; }; +/* ifname can be netmap:foo-xxxx */ +#define MAX_IFNAMELEN 64 /* our buffer for ifname */ /* * global arguments for all threads */ @@ -80,6 +157,7 @@ struct glob_arg { int burst; int forever; int npackets; /* total packets to send */ + int frags; /* fragments per packet */ int nthreads; int cpus; int options; /* testing */ @@ -91,18 +169,25 @@ struct glob_arg { #define OPT_INDIRECT 32 /* use indirect buffers, tx only */ #define OPT_DUMP 64 /* dump rx/tx traffic */ int dev_type; +#ifndef NO_PCAP pcap_t *p; +#endif int tx_rate; struct timespec tx_period; int affinity; int main_fd; - int report_interval; + struct nm_desc *nmd; + uint64_t nmd_flags; + int report_interval; /* milliseconds between prints */ void *(*td_body)(void *); void *mmap_addr; - int mmap_size; - char *ifname; + char ifname[MAX_IFNAMELEN]; + char *nmr_config; + int dummy_send; + int virt_header; /* send also the virt_header */ + int extra_bufs; /* goes in nr_arg3 */ }; enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; @@ -117,9 +202,7 @@ struct targ { int completed; int cancel; int fd; - struct nmreq nmr; - struct netmap_if *nifp; - uint16_t qfirst, qlast; /* range of queues to scan */ + struct nm_desc *nmd; volatile uint64_t count; struct timespec tic, toc; int me; @@ -137,51 +220,67 @@ struct targ { static void extract_ip_range(struct ip_range *r) { - char *p_lo, *p_hi; - char buf1[16]; // one ip address - - D("extract IP range from %s", r->name); - p_lo = index(r->name, ':'); /* do we have ports ? */ - if (p_lo) { - D(" found ports at %s", p_lo); - *p_lo++ = '\0'; - p_hi = index(p_lo, '-'); - if (p_hi) - *p_hi++ = '\0'; - else - p_hi = p_lo; - r->port0 = strtol(p_lo, NULL, 0); - r->port1 = strtol(p_hi, NULL, 0); - if (r->port1 < r->port0) { - r->cur_p = r->port0; - r->port0 = r->port1; - r->port1 = r->cur_p; + char *ap, *pp; + struct in_addr a; + + if (verbose) + D("extract IP range from %s", r->name); + r->port0 = r->port1 = 0; + r->start = r->end = 0; + + /* the first - splits start/end of range */ + ap = index(r->name, '-'); /* do we have ports ? */ + if (ap) { + *ap++ = '\0'; + } + /* grab the initial values (mandatory) */ + pp = index(r->name, ':'); + if (pp) { + *pp++ = '\0'; + r->port0 = r->port1 = strtol(pp, NULL, 0); + }; + inet_aton(r->name, &a); + r->start = r->end = ntohl(a.s_addr); + if (ap) { + pp = index(ap, ':'); + if (pp) { + *pp++ = '\0'; + if (*pp) + r->port1 = strtol(pp, NULL, 0); + } + if (*ap) { + inet_aton(ap, &a); + r->end = ntohl(a.s_addr); } - r->cur_p = r->port0; - D("ports are %d to %d", r->port0, r->port1); } - p_hi = index(r->name, '-'); /* do we have upper ip ? */ - if (p_hi) { - *p_hi++ = '\0'; - } else - p_hi = r->name; - inet_aton(r->name, &r->start); - inet_aton(p_hi, &r->end); - if (r->start.s_addr > r->end.s_addr) { - r->cur = r->start; + if (r->port0 > r->port1) { + uint16_t tmp = r->port0; + r->port0 = r->port1; + r->port1 = tmp; + } + if (r->start > r->end) { + uint32_t tmp = r->start; r->start = r->end; - r->end = r->cur; + r->end = tmp; + } + { + struct in_addr a; + char buf1[16]; // one ip address + + a.s_addr = htonl(r->end); + strncpy(buf1, inet_ntoa(a), sizeof(buf1)); + a.s_addr = htonl(r->start); + if (1) + D("range is %s:%d to %s:%d", + inet_ntoa(a), r->port0, buf1, r->port1); } - r->cur = r->start; - strncpy(buf1, inet_ntoa(r->end), sizeof(buf1)); - D("range is %s %d to %s %d", inet_ntoa(r->start), r->port0, - buf1, r->port1); } static void extract_mac_range(struct mac_range *r) { - D("extract MAC range from %s", r->name); + if (verbose) + D("extract MAC range from %s", r->name); bcopy(ether_aton(r->name), &r->start, 6); bcopy(ether_aton(r->name), &r->end, 6); #if 0 @@ -196,7 +295,8 @@ extract_mac_range(struct mac_range *r) if (p) targ->dst_mac_range = atoi(p+1); #endif - D("%s starts at %s", r->name, ether_ntoa(&r->start)); + if (verbose) + D("%s starts at %s", r->name, ether_ntoa(&r->start)); } static struct targ *targs; @@ -219,19 +319,17 @@ sigint_h(int sig) static int system_ncpus(void) { -#ifdef __FreeBSD__ - int mib[2], ncpus; - size_t len; - - mib[0] = CTL_HW; - mib[1] = HW_NCPU; - len = sizeof(mib); + int ncpus; +#if defined (__FreeBSD__) + int mib[2] = { CTL_HW, HW_NCPU }; + size_t len = sizeof(mib); sysctl(mib, 2, &ncpus, &len, NULL, 0); - +#elif defined(linux) + ncpus = sysconf(_SC_NPROCESSORS_ONLN); +#else /* others */ + ncpus = 1; +#endif /* others */ return (ncpus); -#else - return 1; -#endif /* !__FreeBSD__ */ } #ifdef __linux__ @@ -256,6 +354,58 @@ system_ncpus(void) /* + * parse the vale configuration in conf and put it in nmr. + * Return the flag set if necessary. + * The configuration may consist of 0 to 4 numbers separated + * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. + * Missing numbers or zeroes stand for default values. + * As an additional convenience, if exactly one number + * is specified, then this is assigned to both #tx-slots and #rx-slots. + * If there is no 4th number, then the 3rd is assigned to both #tx-rings + * and #rx-rings. + */ +int +parse_nmr_config(const char* conf, struct nmreq *nmr) +{ + char *w, *tok; + int i, v; + + nmr->nr_tx_rings = nmr->nr_rx_rings = 0; + nmr->nr_tx_slots = nmr->nr_rx_slots = 0; + if (conf == NULL || ! *conf) + return 0; + w = strdup(conf); + for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { + v = atoi(tok); + switch (i) { + case 0: + nmr->nr_tx_slots = nmr->nr_rx_slots = v; + break; + case 1: + nmr->nr_rx_slots = v; + break; + case 2: + nmr->nr_tx_rings = nmr->nr_rx_rings = v; + break; + case 3: + nmr->nr_rx_rings = v; + break; + default: + D("ignored config: %s", tok); + break; + } + } + D("txr %d txd %d rxr %d rxd %d", + nmr->nr_tx_rings, nmr->nr_tx_slots, + nmr->nr_rx_rings, nmr->nr_rx_slots); + free(w); + return (nmr->nr_tx_rings || nmr->nr_tx_slots || + nmr->nr_rx_rings || nmr->nr_rx_slots) ? + NM_OPEN_RING_CFG : 0; +} + + +/* * locate the src mac address for our interface, put it * into the user-supplied buffer. return 0 if ok, -1 on error. */ @@ -296,7 +446,6 @@ source_hwaddr(const char *ifname, char *buf) static int setaffinity(pthread_t me, int i) { -#ifdef __FreeBSD__ cpuset_t cpumask; if (i == -1) @@ -307,13 +456,9 @@ setaffinity(pthread_t me, int i) CPU_SET(i, &cpumask); if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { - D("Unable to set affinity"); + D("Unable to set affinity: %s", strerror(errno)); return 1; } -#else - (void)me; /* suppress 'unused' warnings */ - (void)i; -#endif /* __FreeBSD__ */ return 0; } @@ -360,8 +505,10 @@ dump_payload(char *p, int len, struct netmap_ring *ring, int cur) int i, j, i0; /* get the length in ASCII of the length of the packet. */ - - printf("ring %p cur %5d len %5d buf %p\n", ring, cur, len, p); + + printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", + ring, cur, ring->slot[cur].buf_idx, + ring->slot[cur].flags, len); /* hexdump routine */ for (i = 0; i < len; ) { memset(buf, sizeof(buf), ' '); @@ -389,6 +536,56 @@ dump_payload(char *p, int len, struct netmap_ring *ring, int cur) #define uh_sum check #endif /* linux */ +/* + * increment the addressed in the packet, + * starting from the least significant field. + * DST_IP DST_PORT SRC_IP SRC_PORT + */ +static void +update_addresses(struct pkt *pkt, struct glob_arg *g) +{ + uint32_t a; + uint16_t p; + struct ip *ip = &pkt->ip; + struct udphdr *udp = &pkt->udp; + + do { + p = ntohs(udp->uh_sport); + if (p < g->src_ip.port1) { /* just inc, no wrap */ + udp->uh_sport = htons(p + 1); + break; + } + udp->uh_sport = htons(g->src_ip.port0); + + a = ntohl(ip->ip_src.s_addr); + if (a < g->src_ip.end) { /* just inc, no wrap */ + ip->ip_src.s_addr = htonl(a + 1); + break; + } + ip->ip_src.s_addr = htonl(g->src_ip.start); + + udp->uh_sport = htons(g->src_ip.port0); + p = ntohs(udp->uh_dport); + if (p < g->dst_ip.port1) { /* just inc, no wrap */ + udp->uh_dport = htons(p + 1); + break; + } + udp->uh_dport = htons(g->dst_ip.port0); + + a = ntohl(ip->ip_dst.s_addr); + if (a < g->dst_ip.end) { /* just inc, no wrap */ + ip->ip_dst.s_addr = htonl(a + 1); + break; + } + ip->ip_dst.s_addr = htonl(g->dst_ip.start); + } while (0); + // update checksum +} + +/* + * initialize one packet and prepare for the next one. + * The copy could be done better instead of repeating it each time. + */ static void initialize_packet(struct targ *targ) { @@ -398,17 +595,19 @@ initialize_packet(struct targ *targ) struct udphdr *udp; uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); const char *payload = targ->g->options & OPT_INDIRECT ? - "XXXXXXXXXXXXXXXXXXXXXX" : default_payload; - int i, l, l0 = strlen(payload); - - for (i = 0; i < paylen;) { - l = min(l0, paylen - i); - bcopy(payload, pkt->body + i, l); - i += l; + indirect_payload : default_payload; + int i, l0 = strlen(payload); + + /* create a nice NUL-terminated string */ + for (i = 0; i < paylen; i += l0) { + if (l0 > paylen - i) + l0 = paylen - i; // last round + bcopy(payload, pkt->body + i, l0); } pkt->body[i-1] = '\0'; ip = &pkt->ip; + /* prepare the headers */ ip->ip_v = IPVERSION; ip->ip_hl = 5; ip->ip_id = 0; @@ -418,22 +617,14 @@ initialize_packet(struct targ *targ) ip->ip_off = htons(IP_DF); /* Don't fragment */ ip->ip_ttl = IPDEFTTL; ip->ip_p = IPPROTO_UDP; - ip->ip_dst.s_addr = targ->g->dst_ip.cur.s_addr; - if (++targ->g->dst_ip.cur.s_addr > targ->g->dst_ip.end.s_addr) - targ->g->dst_ip.cur.s_addr = targ->g->dst_ip.start.s_addr; - ip->ip_src.s_addr = targ->g->src_ip.cur.s_addr; - if (++targ->g->src_ip.cur.s_addr > targ->g->src_ip.end.s_addr) - targ->g->src_ip.cur.s_addr = targ->g->src_ip.start.s_addr; + ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); + ip->ip_src.s_addr = htonl(targ->g->src_ip.start); ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); udp = &pkt->udp; - udp->uh_sport = htons(targ->g->src_ip.cur_p); - if (++targ->g->src_ip.cur_p > targ->g->src_ip.port1) - targ->g->src_ip.cur_p = targ->g->src_ip.port0; - udp->uh_dport = htons(targ->g->dst_ip.cur_p); - if (++targ->g->dst_ip.cur_p > targ->g->dst_ip.port1) - targ->g->dst_ip.cur_p = targ->g->dst_ip.port0; + udp->uh_sport = htons(targ->g->src_ip.port0); + udp->uh_dport = htons(targ->g->dst_ip.port0); udp->uh_ulen = htons(paylen); /* Magic: taken from sbin/dhclient/packet.c */ udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), @@ -449,6 +640,8 @@ initialize_packet(struct targ *targ) bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); eh->ether_type = htons(ETHERTYPE_IP); + + bzero(&pkt->vh, sizeof(pkt->vh)); // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); } @@ -460,49 +653,65 @@ initialize_packet(struct targ *targ) * an interrupt when done. */ static int -send_packets(struct netmap_ring *ring, struct pkt *pkt, - int size, u_int count, int options) +send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, + int size, struct glob_arg *g, u_int count, int options, + u_int nfrags) { - u_int sent, cur = ring->cur; - - if (ring->avail < count) - count = ring->avail; - + u_int n, sent, cur = ring->cur; + u_int fcnt; + + n = nm_ring_space(ring); + if (n < count) + count = n; + if (count < nfrags) { + D("truncating packet, no room for frags %d %d", + count, nfrags); + } #if 0 if (options & (OPT_COPY | OPT_PREFETCH) ) { for (sent = 0; sent < count; sent++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); - prefetch(p); - cur = NETMAP_RING_NEXT(ring, cur); + __builtin_prefetch(p); + cur = nm_ring_next(ring, cur); } cur = ring->cur; } #endif - for (sent = 0; sent < count; sent++) { + for (fcnt = nfrags, sent = 0; sent < count; sent++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); slot->flags = 0; - if (options & OPT_DUMP) - dump_payload(p, size, ring, cur); if (options & OPT_INDIRECT) { slot->flags |= NS_INDIRECT; - *((struct pkt **)(void *)p) = pkt; - } else if (options & OPT_COPY) - pkt_copy(pkt, p, size); - else if (options & OPT_MEMCPY) - memcpy(p, pkt, size); - else if (options & OPT_PREFETCH) - prefetch(p); + slot->ptr = (uint64_t)frame; + } else if (options & OPT_COPY) { + nm_pkt_copy(frame, p, size); + if (fcnt == nfrags) + update_addresses(pkt, g); + } else if (options & OPT_MEMCPY) { + memcpy(p, frame, size); + if (fcnt == nfrags) + update_addresses(pkt, g); + } else if (options & OPT_PREFETCH) { + __builtin_prefetch(p); + } + if (options & OPT_DUMP) + dump_payload(p, size, ring, cur); slot->len = size; - if (sent == count - 1) + if (--fcnt > 0) + slot->flags |= NS_MOREFRAG; + else + fcnt = nfrags; + if (sent == count - 1) { + slot->flags &= ~NS_MOREFRAG; slot->flags |= NS_REPORT; - cur = NETMAP_RING_NEXT(ring, cur); + } + cur = nm_ring_next(ring, cur); } - ring->avail -= sent; - ring->cur = cur; + ring->head = ring->cur = cur; return (sent); } @@ -518,52 +727,58 @@ static void * pinger_body(void *data) { struct targ *targ = (struct targ *) data; - struct pollfd fds[1]; - struct netmap_if *nifp = targ->nifp; + struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; + struct netmap_if *nifp = targ->nmd->nifp; int i, rx = 0, n = targ->g->npackets; - - fds[0].fd = targ->fd; - fds[0].events = (POLLIN); - static uint32_t sent; + void *frame; + int size; + uint32_t sent = 0; struct timespec ts, now, last_print; uint32_t count = 0, min = 1000000000, av = 0; + frame = &targ->pkt; + frame += sizeof(targ->pkt.vh) - targ->g->virt_header; + size = targ->g->pkt_size + targ->g->virt_header; + + if (targ->g->nthreads > 1) { D("can only ping with 1 thread"); return NULL; } clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); + now = last_print; while (n == 0 || (int)sent < n) { struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); struct netmap_slot *slot; char *p; - for (i = 0; i < 1; i++) { + for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ slot = &ring->slot[ring->cur]; - slot->len = targ->g->pkt_size; + slot->len = size; p = NETMAP_BUF(ring, slot->buf_idx); - if (ring->avail == 0) { + if (nm_ring_empty(ring)) { D("-- ouch, cannot send"); } else { - pkt_copy(&targ->pkt, p, targ->g->pkt_size); + nm_pkt_copy(frame, p, size); clock_gettime(CLOCK_REALTIME_PRECISE, &ts); bcopy(&sent, p+42, sizeof(sent)); bcopy(&ts, p+46, sizeof(ts)); sent++; - ring->cur = NETMAP_RING_NEXT(ring, ring->cur); - ring->avail--; + ring->head = ring->cur = nm_ring_next(ring, ring->cur); } } /* should use a parameter to decide how often to send */ - if (poll(fds, 1, 3000) <= 0) { - D("poll error/timeout on queue %d", targ->me); + if (poll(&pfd, 1, 3000) <= 0) { + D("poll error/timeout on queue %d: %s", targ->me, + strerror(errno)); continue; } /* see what we got back */ - for (i = targ->qfirst; i < targ->qlast; i++) { + for (i = targ->nmd->first_tx_ring; + i <= targ->nmd->last_tx_ring; i++) { ring = NETMAP_RXRING(nifp, i); - while (ring->avail > 0) { + while (!nm_ring_empty(ring)) { uint32_t seq; slot = &ring->slot[ring->cur]; p = NETMAP_BUF(ring, slot->buf_idx); @@ -583,8 +798,7 @@ pinger_body(void *data) min = ts.tv_nsec; count ++; av += ts.tv_nsec; - ring->avail--; - ring->cur = NETMAP_RING_NEXT(ring, ring->cur); + ring->head = ring->cur = nm_ring_next(ring, ring->cur); rx++; } } @@ -616,12 +830,10 @@ static void * ponger_body(void *data) { struct targ *targ = (struct targ *) data; - struct pollfd fds[1]; - struct netmap_if *nifp = targ->nifp; + struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; + struct netmap_if *nifp = targ->nmd->nifp; struct netmap_ring *txring, *rxring; int i, rx = 0, sent = 0, n = targ->g->npackets; - fds[0].fd = targ->fd; - fds[0].events = (POLLIN); if (targ->g->nthreads > 1) { D("can only reply ping with 1 thread"); @@ -632,28 +844,28 @@ ponger_body(void *data) uint32_t txcur, txavail; //#define BUSYWAIT #ifdef BUSYWAIT - ioctl(fds[0].fd, NIOCRXSYNC, NULL); + ioctl(pfd.fd, NIOCRXSYNC, NULL); #else - if (poll(fds, 1, 1000) <= 0) { - D("poll error/timeout on queue %d", targ->me); + if (poll(&pfd, 1, 1000) <= 0) { + D("poll error/timeout on queue %d: %s", targ->me, + strerror(errno)); continue; } #endif txring = NETMAP_TXRING(nifp, 0); txcur = txring->cur; - txavail = txring->avail; + txavail = nm_ring_space(txring); /* see what we got back */ - for (i = targ->qfirst; i < targ->qlast; i++) { + for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { rxring = NETMAP_RXRING(nifp, i); - while (rxring->avail > 0) { + while (!nm_ring_empty(rxring)) { uint16_t *spkt, *dpkt; uint32_t cur = rxring->cur; struct netmap_slot *slot = &rxring->slot[cur]; char *src, *dst; src = NETMAP_BUF(rxring, slot->buf_idx); //D("got pkt %p of size %d", src, slot->len); - rxring->avail--; - rxring->cur = NETMAP_RING_NEXT(rxring, cur); + rxring->head = rxring->cur = nm_ring_next(rxring, cur); rx++; if (txavail == 0) continue; @@ -662,7 +874,7 @@ ponger_body(void *data) /* copy... */ dpkt = (uint16_t *)dst; spkt = (uint16_t *)src; - pkt_copy(src, dst, slot->len); + nm_pkt_copy(src, dst, slot->len); dpkt[0] = spkt[3]; dpkt[1] = spkt[4]; dpkt[2] = spkt[5]; @@ -671,16 +883,15 @@ ponger_body(void *data) dpkt[5] = spkt[2]; txring->slot[txcur].len = slot->len; /* XXX swap src dst mac */ - txcur = NETMAP_RING_NEXT(txring, txcur); + txcur = nm_ring_next(txring, txcur); txavail--; sent++; } } - txring->cur = txcur; - txring->avail = txavail; + txring->head = txring->cur = txcur; targ->count = sent; #ifdef BUSYWAIT - ioctl(fds[0].fd, NIOCTXSYNC, NULL); + ioctl(pfd.fd, NIOCTXSYNC, NULL); #endif //D("tx %d rx %d", sent, rx); } @@ -721,124 +932,129 @@ timespec2val(const struct timespec *a) } -static int -wait_time(struct timespec ts, struct timespec *wakeup_ts, long long *waited) +static __inline struct timespec +timespec_add(struct timespec a, struct timespec b) { - struct timespec curtime; - - curtime.tv_sec = 0; - curtime.tv_nsec = 0; - - if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) { - D("clock_gettime: %s", strerror(errno)); - return (-1); - } - while (timespec_ge(&ts, &curtime)) { - if (waited != NULL) - (*waited)++; - if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) { - D("clock_gettime"); - return (-1); - } + struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; + if (ret.tv_nsec >= 1000000000) { + ret.tv_sec++; + ret.tv_nsec -= 1000000000; } - if (wakeup_ts != NULL) - *wakeup_ts = curtime; - return (0); + return ret; } -static __inline void -timespec_add(struct timespec *tsa, struct timespec *tsb) +static __inline struct timespec +timespec_sub(struct timespec a, struct timespec b) { - tsa->tv_sec += tsb->tv_sec; - tsa->tv_nsec += tsb->tv_nsec; - if (tsa->tv_nsec >= 1000000000) { - tsa->tv_sec++; - tsa->tv_nsec -= 1000000000; + struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; + if (ret.tv_nsec < 0) { + ret.tv_sec--; + ret.tv_nsec += 1000000000; } + return ret; } +/* + * wait until ts, either busy or sleeping if more than 1ms. + * Return wakeup time. + */ +static struct timespec +wait_time(struct timespec ts) +{ + for (;;) { + struct timespec w, cur; + clock_gettime(CLOCK_REALTIME_PRECISE, &cur); + w = timespec_sub(ts, cur); + if (w.tv_sec < 0) + return cur; + else if (w.tv_sec > 0 || w.tv_nsec > 1000000) + poll(NULL, 0, 1); + } +} + static void * sender_body(void *data) { struct targ *targ = (struct targ *) data; - - struct pollfd fds[1]; - struct netmap_if *nifp = targ->nifp; + struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; + struct netmap_if *nifp = targ->nmd->nifp; struct netmap_ring *txring; - int i, n = targ->g->npackets / targ->g->nthreads, sent = 0; + int i, n = targ->g->npackets / targ->g->nthreads; + int64_t sent = 0; int options = targ->g->options | OPT_COPY; - struct timespec tmptime, nexttime = { 0, 0}; // XXX silence compiler + struct timespec nexttime = { 0, 0}; // XXX silence compiler int rate_limit = targ->g->tx_rate; - long long waited = 0; + struct pkt *pkt = &targ->pkt; + void *frame; + int size; + + frame = pkt; + frame += sizeof(pkt->vh) - targ->g->virt_header; + size = targ->g->pkt_size + targ->g->virt_header; D("start"); if (setaffinity(targ->thread, targ->affinity)) goto quit; - /* setup poll(2) mechanism. */ - memset(fds, 0, sizeof(fds)); - fds[0].fd = targ->fd; - fds[0].events = (POLLOUT); /* main loop.*/ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); if (rate_limit) { - tmptime.tv_sec = 2; - tmptime.tv_nsec = 0; - timespec_add(&targ->tic, &tmptime); + targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); targ->tic.tv_nsec = 0; - if (wait_time(targ->tic, NULL, NULL) == -1) { - D("wait_time: %s", strerror(errno)); - goto quit; - } + wait_time(targ->tic); nexttime = targ->tic; } - if (targ->g->dev_type == DEV_PCAP) { - int size = targ->g->pkt_size; - void *pkt = &targ->pkt; - pcap_t *p = targ->g->p; + if (targ->g->dev_type == DEV_TAP) { + D("writing to file desc %d", targ->g->main_fd); for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { - if (pcap_inject(p, pkt, size) != -1) + if (write(targ->g->main_fd, frame, size) != -1) sent++; + update_addresses(pkt, targ->g); if (i > 10000) { targ->count = sent; i = 0; } } - } else if (targ->g->dev_type == DEV_TAP) { /* tap */ - int size = targ->g->pkt_size; - void *pkt = &targ->pkt; - D("writing to file desc %d", targ->g->main_fd); +#ifndef NO_PCAP + } else if (targ->g->dev_type == DEV_PCAP) { + pcap_t *p = targ->g->p; for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { - if (write(targ->g->main_fd, pkt, size) != -1) + if (pcap_inject(p, frame, size) != -1) sent++; + update_addresses(pkt, targ->g); if (i > 10000) { targ->count = sent; i = 0; } } +#endif /* NO_PCAP */ } else { int tosend = 0; + int frags = targ->g->frags; + while (!targ->cancel && (n == 0 || sent < n)) { if (rate_limit && tosend <= 0) { tosend = targ->g->burst; - timespec_add(&nexttime, &targ->g->tx_period); - if (wait_time(nexttime, &tmptime, &waited) == -1) { - D("wait_time"); - goto quit; - } + nexttime = timespec_add(nexttime, targ->g->tx_period); + wait_time(nexttime); } /* * wait for available room in the send queue(s) */ - if (poll(fds, 1, 2000) <= 0) { + if (poll(&pfd, 1, 2000) <= 0) { if (targ->cancel) break; - D("poll error/timeout on queue %d", targ->me); + D("poll error/timeout on queue %d: %s", targ->me, + strerror(errno)); + // goto quit; + } + if (pfd.revents & POLLERR) { + D("poll error"); goto quit; } /* @@ -848,32 +1064,41 @@ sender_body(void *data) D("drop copy"); options &= ~OPT_COPY; } - for (i = targ->qfirst; i < targ->qlast; i++) { + for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { int m, limit = rate_limit ? tosend : targ->g->burst; if (n > 0 && n - sent < limit) limit = n - sent; txring = NETMAP_TXRING(nifp, i); - if (txring->avail == 0) + if (nm_ring_empty(txring)) continue; - m = send_packets(txring, &targ->pkt, targ->g->pkt_size, - limit, options); + if (frags > 1) + limit = ((limit + frags - 1) / frags) * frags; + + m = send_packets(txring, pkt, frame, size, targ->g, + limit, options, frags); + ND("limit %d tail %d frags %d m %d", + limit, txring->tail, frags, m); sent += m; - tosend -= m; targ->count = sent; + if (rate_limit) { + tosend -= m; + if (tosend <= 0) + break; + } } } /* flush any remaining packets */ - ioctl(fds[0].fd, NIOCTXSYNC, NULL); + ioctl(pfd.fd, NIOCTXSYNC, NULL); /* final part: wait all the TX queues to be empty. */ - for (i = targ->qfirst; i < targ->qlast; i++) { + for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { txring = NETMAP_TXRING(nifp, i); - while (!NETMAP_TX_RING_EMPTY(txring)) { - ioctl(fds[0].fd, NIOCTXSYNC, NULL); + while (nm_tx_pending(txring)) { + ioctl(pfd.fd, NIOCTXSYNC, NULL); usleep(1); /* wait 1 tick */ } } - } + } /* end DEV_NETMAP */ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); targ->completed = 1; @@ -887,6 +1112,7 @@ quit: } +#ifndef NO_PCAP static void receive_pcap(u_char *user, const struct pcap_pkthdr * h, const u_char * bytes) @@ -896,27 +1122,27 @@ receive_pcap(u_char *user, const struct pcap_pkthdr * h, (void)bytes; /* UNUSED */ (*count)++; } +#endif /* !NO_PCAP */ static int receive_packets(struct netmap_ring *ring, u_int limit, int dump) { - u_int cur, rx; + u_int cur, rx, n; cur = ring->cur; - if (ring->avail < limit) - limit = ring->avail; + n = nm_ring_space(ring); + if (n < limit) + limit = n; for (rx = 0; rx < limit; rx++) { struct netmap_slot *slot = &ring->slot[cur]; char *p = NETMAP_BUF(ring, slot->buf_idx); - slot->flags = OPT_INDIRECT; // XXX if (dump) dump_payload(p, slot->len, ring, cur); - cur = NETMAP_RING_NEXT(ring, cur); + cur = nm_ring_next(ring, cur); } - ring->avail -= rx; - ring->cur = cur; + ring->head = ring->cur = cur; return (rx); } @@ -925,8 +1151,8 @@ static void * receiver_body(void *data) { struct targ *targ = (struct targ *) data; - struct pollfd fds[1]; - struct netmap_if *nifp = targ->nifp; + struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; + struct netmap_if *nifp = targ->nmd->nifp; struct netmap_ring *rxring; int i; uint64_t received = 0; @@ -934,27 +1160,18 @@ receiver_body(void *data) if (setaffinity(targ->thread, targ->affinity)) goto quit; - /* setup poll(2) mechanism. */ - memset(fds, 0, sizeof(fds)); - fds[0].fd = targ->fd; - fds[0].events = (POLLIN); - /* unbounded wait for the first packet. */ for (;;) { - i = poll(fds, 1, 1000); - if (i > 0 && !(fds[0].revents & POLLERR)) + i = poll(&pfd, 1, 1000); + if (i > 0 && !(pfd.revents & POLLERR)) break; - D("waiting for initial packets, poll returns %d %d", i, fds[0].revents); + RD(1, "waiting for initial packets, poll returns %d %d", + i, pfd.revents); } /* main loop, exit after 1s silence */ clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); - if (targ->g->dev_type == DEV_PCAP) { - while (!targ->cancel) { - /* XXX should we poll ? */ - pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL); - } - } else if (targ->g->dev_type == DEV_TAP) { + if (targ->g->dev_type == DEV_TAP) { D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd); while (!targ->cancel) { char buf[2048]; @@ -962,34 +1179,46 @@ receiver_body(void *data) if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) targ->count++; } +#ifndef NO_PCAP + } else if (targ->g->dev_type == DEV_PCAP) { + while (!targ->cancel) { + /* XXX should we poll ? */ + pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL); + } +#endif /* !NO_PCAP */ } else { int dump = targ->g->options & OPT_DUMP; while (!targ->cancel) { /* Once we started to receive packets, wait at most 1 seconds before quitting. */ - if (poll(fds, 1, 1 * 1000) <= 0 && !targ->g->forever) { + if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); targ->toc.tv_sec -= 1; /* Subtract timeout time. */ - break; + goto out; } - for (i = targ->qfirst; i < targ->qlast; i++) { + if (pfd.revents & POLLERR) { + D("poll err"); + goto quit; + } + + for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { int m; rxring = NETMAP_RXRING(nifp, i); - if (rxring->avail == 0) + if (nm_ring_empty(rxring)) continue; m = receive_packets(rxring, targ->g->burst, dump); received += m; } targ->count = received; - - // tell the card we have read the data - //ioctl(fds[0].fd, NIOCRXSYNC, NULL); } } + clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); + +out: targ->completed = 1; targ->count = received; @@ -1006,10 +1235,10 @@ quit: static const char * norm(char *buf, double val) { - char *units[] = { "", "K", "M", "G" }; + char *units[] = { "", "K", "M", "G", "T" }; u_int i; - for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++) + for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) val /= 1000; sprintf(buf, "%.2f %s", val, units[i]); return buf; @@ -1021,8 +1250,8 @@ tx_output(uint64_t sent, int size, double delta) double bw, raw_bw, pps; char b1[40], b2[80], b3[80]; - printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n", - sent, size, delta); + printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", + (unsigned long long)sent, size, delta); if (delta == 0) delta = 1e-6; if (size < 60) /* correct for min packet size */ @@ -1043,7 +1272,8 @@ rx_output(uint64_t received, double delta) double pps; char b1[40]; - printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta); + printf("Received %llu packets, in %.2f seconds.\n", + (unsigned long long) received, delta); if (delta == 0) delta = 1e-6; @@ -1063,18 +1293,21 @@ usage(void) "\t-n count number of iterations (can be 0)\n" "\t-t pkts_to_send also forces tx mode\n" "\t-r pkts_to_receive also forces rx mode\n" - "\t-l pkts_size in bytes excluding CRC\n" - "\t-d dst-ip end with %%n to sweep n addresses\n" - "\t-s src-ip end with %%n to sweep n addresses\n" - "\t-D dst-mac end with %%n to sweep n addresses\n" - "\t-S src-mac end with %%n to sweep n addresses\n" + "\t-l pkt_size in bytes excluding CRC\n" + "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" + "\t-s src_ip[:port[-src_ip:port]] single or range\n" + "\t-D dst-mac\n" + "\t-S src-mac\n" "\t-a cpu_id use setaffinity\n" "\t-b burst size testing, mostly\n" "\t-c cores cores to use\n" "\t-p threads processes/threads to use\n" "\t-T report_ms milliseconds between reports\n" - "\t-P use libpcap instead of netmap\n" + "\t-P use libpcap instead of netmap\n" "\t-w wait_for_link_time in seconds\n" + "\t-R rate in packets per second\n" + "\t-X dump payload\n" + "\t-H len add empty virtio-net-header with size 'len'\n" "", cmd); @@ -1092,65 +1325,57 @@ start_threads(struct glob_arg *g) * using a single descriptor. */ for (i = 0; i < g->nthreads; i++) { - bzero(&targs[i], sizeof(targs[i])); - targs[i].fd = -1; /* default, with pcap */ - targs[i].g = g; + struct targ *t = &targs[i]; + + bzero(t, sizeof(*t)); + t->fd = -1; /* default, with pcap */ + t->g = g; if (g->dev_type == DEV_NETMAP) { - struct nmreq tifreq; - int tfd; + struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ - /* register interface. */ - tfd = open("/dev/netmap", O_RDWR); - if (tfd == -1) { - D("Unable to open /dev/netmap"); - continue; + if (g->nthreads > 1) { + if (nmd.req.nr_flags != NR_REG_ALL_NIC) { + D("invalid nthreads mode %d", nmd.req.nr_flags); + continue; + } + nmd.req.nr_flags = NR_REG_ONE_NIC; + nmd.req.nr_ringid = i; } - targs[i].fd = tfd; - - bzero(&tifreq, sizeof(tifreq)); - strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name)); - tifreq.nr_version = NETMAP_API; - tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0; + /* Only touch one of the rings (rx is already ok) */ + if (g->td_body == receiver_body) + nmd.req.nr_ringid |= NETMAP_NO_TX_POLL; - /* - * if we are acting as a receiver only, do not touch the transmit ring. - * This is not the default because many apps may use the interface - * in both directions, but a pure receiver does not. - */ - if (g->td_body == receiver_body) { - tifreq.nr_ringid |= NETMAP_NO_TX_POLL; - } + /* register interface. Override ifname and ringid etc. */ - if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) { - D("Unable to register %s", g->ifname); + t->nmd = nm_open(t->g->ifname, NULL, g->nmd_flags | + NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, g->nmd); + if (t->nmd == NULL) { + D("Unable to open %s: %s", + t->g->ifname, strerror(errno)); continue; } - targs[i].nmr = tifreq; - targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset); - /* start threads. */ - targs[i].qfirst = (g->nthreads > 1) ? i : 0; - targs[i].qlast = (g->nthreads > 1) ? i+1 : - (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings); + t->fd = t->nmd->fd; + } else { targs[i].fd = g->main_fd; } - targs[i].used = 1; - targs[i].me = i; + t->used = 1; + t->me = i; if (g->affinity >= 0) { if (g->affinity < g->cpus) - targs[i].affinity = g->affinity; + t->affinity = g->affinity; else - targs[i].affinity = i % g->cpus; - } else - targs[i].affinity = -1; + t->affinity = i % g->cpus; + } else { + t->affinity = -1; + } /* default, init packets */ - initialize_packet(&targs[i]); + initialize_packet(t); - if (pthread_create(&targs[i].thread, NULL, g->td_body, - &targs[i]) == -1) { - D("Unable to create thread %d", i); - targs[i].used = 0; + if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { + D("Unable to create thread %d: %s", i, strerror(errno)); + t->used = 0; } } } @@ -1175,7 +1400,6 @@ main_thread(struct glob_arg *g) delta.tv_usec = (g->report_interval%1000)*1000; select(0, NULL, NULL, NULL, &delta); gettimeofday(&now, NULL); - time_second = now.tv_sec; timersub(&now, &toc, &toc); my_count = 0; for (i = 0; i < g->nthreads; i++) { @@ -1188,8 +1412,10 @@ main_thread(struct glob_arg *g) continue; npkts = my_count - prev; pps = (npkts*1000000 + usec/2) / usec; - D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)", - pps, npkts, usec); + D("%llu pps (%llu pkts in %llu usec)", + (unsigned long long)pps, + (unsigned long long)npkts, + (unsigned long long)usec); prev = my_count; toc = now; if (done == g->nthreads) @@ -1233,7 +1459,7 @@ main_thread(struct glob_arg *g) rx_output(count, delta_t); if (g->dev_type == DEV_NETMAP) { - munmap(g->mmap_addr, g->mmap_size); + munmap(g->nmd->mem, g->nmd->req.nr_memsize); close(g->main_fd); } } @@ -1296,7 +1522,7 @@ tap_alloc(char *dev) /* try to create the device */ if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { - D("failed to to a TUNSETIFF"); + D("failed to to a TUNSETIFF: %s", strerror(errno)); close(fd); return err; } @@ -1321,7 +1547,6 @@ main(int arc, char **argv) struct glob_arg g; - struct nmreq nmr; int ch; int wait_link = 2; int devqueues = 1; /* how many device queues */ @@ -1343,9 +1568,12 @@ main(int arc, char **argv) g.cpus = 1; g.forever = 1; g.tx_rate = 0; + g.frags = 1; + g.nmr_config = ""; + g.virt_header = 0; while ( (ch = getopt(arc, argv, - "a:f:n:i:It:r:l:d:s:D:S:b:c:o:p:PT:w:WvR:X")) != -1) { + "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:")) != -1) { struct sf *fn; switch(ch) { @@ -1358,6 +1586,15 @@ main(int arc, char **argv) g.npackets = atoi(optarg); break; + case 'F': + i = atoi(optarg); + if (i < 1 || i > 63) { + D("invalid frags %d [1..63], ignore", i); + break; + } + g.frags = i; + break; + case 'f': for (fn = func; fn->key; fn++) { if (!strcmp(fn->key, optarg)) @@ -1378,29 +1615,39 @@ main(int arc, char **argv) break; case 'i': /* interface */ - g.ifname = optarg; - if (!strncmp(optarg, "tap", 3)) + /* a prefix of tap: netmap: or pcap: forces the mode. + * otherwise we guess + */ + D("interface is %s", optarg); + if (strlen(optarg) > MAX_IFNAMELEN - 8) { + D("ifname too long %s", optarg); + break; + } + strcpy(g.ifname, optarg); + if (!strcmp(optarg, "null")) { + g.dev_type = DEV_NETMAP; + g.dummy_send = 1; + } else if (!strncmp(optarg, "tap:", 4)) { g.dev_type = DEV_TAP; - else + strcpy(g.ifname, optarg + 4); + } else if (!strncmp(optarg, "pcap:", 5)) { + g.dev_type = DEV_PCAP; + strcpy(g.ifname, optarg + 5); + } else if (!strncmp(optarg, "netmap:", 7) || + !strncmp(optarg, "vale", 4)) { g.dev_type = DEV_NETMAP; + } else if (!strncmp(optarg, "tap", 3)) { + g.dev_type = DEV_TAP; + } else { /* prepend netmap: */ + g.dev_type = DEV_NETMAP; + sprintf(g.ifname, "netmap:%s", optarg); + } break; case 'I': g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ break; - case 't': /* send, deprecated */ - D("-t deprecated, please use -f tx -n %s", optarg); - g.td_body = sender_body; - g.npackets = atoi(optarg); - break; - - case 'r': /* receive */ - D("-r deprecated, please use -f rx -n %s", optarg); - g.td_body = receiver_body; - g.npackets = atoi(optarg); - break; - case 'l': /* pkt_size */ g.pkt_size = atoi(optarg); break; @@ -1435,10 +1682,6 @@ main(int arc, char **argv) g.nthreads = atoi(optarg); break; - case 'P': - g.dev_type = DEV_PCAP; - break; - case 'D': /* destination mac */ g.dst_mac.name = optarg; break; @@ -1454,6 +1697,16 @@ main(int arc, char **argv) break; case 'X': g.options |= OPT_DUMP; + break; + case 'C': + g.nmr_config = strdup(optarg); + break; + case 'H': + g.virt_header = atoi(optarg); + break; + case 'e': /* extra bufs */ + g.extra_bufs = atoi(optarg); + break; } } @@ -1490,6 +1743,18 @@ main(int arc, char **argv) extract_mac_range(&g.src_mac); extract_mac_range(&g.dst_mac); + if (g.src_ip.start != g.src_ip.end || + g.src_ip.port0 != g.src_ip.port1 || + g.dst_ip.start != g.dst_ip.end || + g.dst_ip.port0 != g.dst_ip.port1) + g.options |= OPT_COPY; + + if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 + && g.virt_header != VIRT_HDR_2) { + D("bad virtio-net-header length"); + usage(); + } + if (g.dev_type == DEV_TAP) { D("want to use tap %s", g.ifname); g.main_fd = tap_alloc(g.ifname); @@ -1497,7 +1762,8 @@ main(int arc, char **argv) D("cannot open tap %s", g.ifname); usage(); } - } else if (g.dev_type > DEV_NETMAP) { +#ifndef NO_PCAP + } else if (g.dev_type == DEV_PCAP) { char pcap_errbuf[PCAP_ERRBUF_SIZE]; D("using pcap on %s", g.ifname); @@ -1507,36 +1773,37 @@ main(int arc, char **argv) D("cannot open pcap on %s", g.ifname); usage(); } +#endif /* !NO_PCAP */ + } else if (g.dummy_send) { /* but DEV_NETMAP */ + D("using a dummy send routine"); } else { - bzero(&nmr, sizeof(nmr)); - nmr.nr_version = NETMAP_API; + struct nm_desc base_nmd; + + bzero(&base_nmd, sizeof(base_nmd)); + + g.nmd_flags = 0; + g.nmd_flags |= parse_nmr_config(g.nmr_config, &base_nmd.req); + if (g.extra_bufs) { + base_nmd.req.nr_arg3 = g.extra_bufs; + g.nmd_flags |= NM_OPEN_ARG3; + } + /* - * Open the netmap device to fetch the number of queues of our - * interface. + * Open the netmap device using nm_open(). * - * The first NIOCREGIF also detaches the card from the * protocol stack and may cause a reset of the card, * which in turn may take some time for the PHY to - * reconfigure. + * reconfigure. We do the open here to have time to reset. */ - g.main_fd = open("/dev/netmap", O_RDWR); - if (g.main_fd == -1) { - D("Unable to open /dev/netmap"); - // fail later - } else { - if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) { - D("Unable to get if info without name"); - } else { - D("map size is %d Kb", nmr.nr_memsize >> 10); - } - bzero(&nmr, sizeof(nmr)); - nmr.nr_version = NETMAP_API; - strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name)); - if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) { - D("Unable to get if info for %s", g.ifname); - } - devqueues = nmr.nr_rx_rings; + g.nmd = nm_open(g.ifname, NULL, g.nmd_flags, &base_nmd); + if (g.nmd == NULL) { + D("Unable to open %s: %s", g.ifname, strerror(errno)); + goto out; } + g.main_fd = g.nmd->fd; + D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); + + devqueues = g.nmd->req.nr_rx_rings; /* validate provided nthreads. */ if (g.nthreads < 1 || g.nthreads > devqueues) { @@ -1544,36 +1811,23 @@ main(int arc, char **argv) // continue, fail later } - /* - * Map the netmap shared memory: instead of issuing mmap() - * inside the body of the threads, we prefer to keep this - * operation here to simplify the thread logic. - */ - D("mapping %d Kbytes", nmr.nr_memsize>>10); - g.mmap_size = nmr.nr_memsize; - g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize, - PROT_WRITE | PROT_READ, - MAP_SHARED, g.main_fd, 0); - if (g.mmap_addr == MAP_FAILED) { - D("Unable to mmap %d KB", nmr.nr_memsize >> 10); - // continue, fail later - } + if (verbose) { + struct netmap_if *nifp = g.nmd->nifp; + struct nmreq *req = &g.nmd->req; - /* - * Register the interface on the netmap device: from now on, - * we can operate on the network interface without any - * interference from the legacy network stack. - * - * We decide to put the first interface registration here to - * give time to cards that take a long time to reset the PHY. - */ - nmr.nr_version = NETMAP_API; - if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) { - D("Unable to register interface %s", g.ifname); - //continue, fail later + D("nifp at offset %d, %d tx %d rx region %d", + req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, + req->nr_arg2); + for (i = 0; i <= req->nr_tx_rings; i++) { + D(" TX%d at 0x%lx", i, + (char *)NETMAP_TXRING(nifp, i) - (char *)nifp); + } + for (i = 0; i <= req->nr_rx_rings; i++) { + D(" RX%d at 0x%lx", i, + (char *)NETMAP_RXRING(nifp, i) - (char *)nifp); + } } - /* Print some debug information. */ fprintf(stdout, "%s %s: %d queues, %d threads and %d cpus.\n", @@ -1587,7 +1841,8 @@ main(int arc, char **argv) g.src_ip.name, g.dst_ip.name, g.src_mac.name, g.dst_mac.name); } - + +out: /* Exit if something went wrong. */ if (g.main_fd < 0) { D("aborting"); @@ -1595,6 +1850,7 @@ main(int arc, char **argv) } } + if (g.options) { D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", g.options & OPT_PREFETCH ? " prefetch" : "", @@ -1603,23 +1859,27 @@ main(int arc, char **argv) g.options & OPT_INDIRECT ? " indirect" : "", g.options & OPT_COPY ? " copy" : ""); } - - if (g.tx_rate == 0) { - g.tx_period.tv_sec = 0; - g.tx_period.tv_nsec = 0; - } else if (g.tx_rate == 1) { - g.tx_period.tv_sec = 1; - g.tx_period.tv_nsec = 0; - } else { - g.tx_period.tv_sec = 0; - g.tx_period.tv_nsec = (1e9 / g.tx_rate) * g.burst; - if (g.tx_period.tv_nsec > 1000000000) { - g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; - g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; - } + + g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; + if (g.tx_rate > 0) { + /* try to have at least something every second, + * reducing the burst size to some 0.01s worth of data + * (but no less than one full set of fragments) + */ + uint64_t x; + int lim = (g.tx_rate)/300; + if (g.burst > lim) + g.burst = lim; + if (g.burst < g.frags) + g.burst = g.frags; + x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; + g.tx_period.tv_nsec = x; + g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; + g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; } - D("Sending %d packets every %d.%09d ns", - g.burst, (int)g.tx_period.tv_sec, (int)g.tx_period.tv_nsec); + if (g.td_body == sender_body) + D("Sending %d packets every %ld.%09ld s", + g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); /* Wait for PHY reset. */ D("Wait %d secs for phy reset", wait_link); sleep(wait_link); @@ -1629,16 +1889,6 @@ main(int arc, char **argv) global_nthreads = g.nthreads; signal(SIGINT, sigint_h); -#if 0 // XXX this is not needed, i believe - if (g.dev_type > DEV_NETMAP) { - g.p = pcap_open_live(g.ifname, 0, 1, 100, NULL); - if (g.p == NULL) { - D("cannot open pcap on %s", g.ifname); - usage(); - } else - D("using pcap %p on %s", g.p, g.ifname); - } -#endif // XXX start_threads(&g); main_thread(&g); return 0; diff --git a/tools/tools/netmap/vale-ctl.c b/tools/tools/netmap/vale-ctl.c index 0a478ba..e1d8da5 100644 --- a/tools/tools/netmap/vale-ctl.c +++ b/tools/tools/netmap/vale-ctl.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Michio Honda. All rights reserved. + * Copyright (C) 2013-2014 Michio Honda. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -33,6 +33,7 @@ #include <unistd.h> /* close */ #include <sys/ioctl.h> /* ioctl */ #include <sys/param.h> +#include <sys/socket.h> /* apple needs sockaddr */ #include <net/if.h> /* ifreq */ #include <net/netmap.h> #include <net/netmap_user.h> @@ -69,20 +70,22 @@ bdg_ctl(const char *name, int nr_cmd, int nr_arg) nr_arg = 0; nmr.nr_arg1 = nr_arg; error = ioctl(fd, NIOCREGIF, &nmr); - if (error == -1) - D("Unable to %s %s to the bridge", nr_cmd == + if (error == -1) { + ND("Unable to %s %s to the bridge", nr_cmd == NETMAP_BDG_DETACH?"detach":"attach", name); - else - D("Success to %s %s to the bridge\n", nr_cmd == + perror(name); + } else + ND("Success to %s %s to the bridge", nr_cmd == NETMAP_BDG_DETACH?"detach":"attach", name); break; case NETMAP_BDG_LIST: if (strlen(nmr.nr_name)) { /* name to bridge/port info */ error = ioctl(fd, NIOCGINFO, &nmr); - if (error) - D("Unable to obtain info for %s", name); - else + if (error) { + ND("Unable to obtain info for %s", name); + perror(name); + } else D("%s at bridge:%d port:%d", name, nmr.nr_arg1, nmr.nr_arg2); break; @@ -101,9 +104,10 @@ bdg_ctl(const char *name, int nr_cmd, int nr_arg) default: /* GINFO */ nmr.nr_cmd = nmr.nr_arg1 = nmr.nr_arg2 = 0; error = ioctl(fd, NIOCGINFO, &nmr); - if (error) - D("Unable to get if info for %s", name); - else + if (error) { + ND("Unable to get if info for %s", name); + perror(name); + } else D("%s: %d queues.", name, nmr.nr_rx_rings); break; } @@ -118,7 +122,7 @@ main(int argc, char *argv[]) const char *command = basename(argv[0]); char *name = NULL; - if (argc != 3 && argc != 1 /* list all */ ) { + if (argc > 3) { usage: fprintf(stderr, "Usage:\n" @@ -127,12 +131,13 @@ usage: "\t-d interface interface name to be detached\n" "\t-a interface interface name to be attached\n" "\t-h interface interface name to be attached with the host stack\n" - "\t-l list all or specified bridge's interfaces\n" + "\t-l list all or specified bridge's interfaces (default)\n" "", command); return 0; } - while ((ch = getopt(argc, argv, "d:a:h:g:l:")) != -1) { + while ((ch = getopt(argc, argv, "d:a:h:g:l")) != -1) { + name = optarg; /* default */ switch (ch) { default: fprintf(stderr, "bad option %c %s", ch, optarg); @@ -152,12 +157,16 @@ usage: break; case 'l': nr_cmd = NETMAP_BDG_LIST; + if (optind < argc && argv[optind][0] == '-') + name = NULL; break; } - name = optarg; + if (optind != argc) { + // fprintf(stderr, "optind %d argc %d\n", optind, argc); + goto usage; + } } if (argc == 1) nr_cmd = NETMAP_BDG_LIST; - bdg_ctl(name, nr_cmd, nr_arg); - return 0; + return bdg_ctl(name, nr_cmd, nr_arg) ? 1 : 0; } |