summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2014-01-06 12:53:15 +0000
committerluigi <luigi@FreeBSD.org>2014-01-06 12:53:15 +0000
commit41068e3dad4b13c5f9eb42f4dc310e9abf19de43 (patch)
tree529a5d218d5f4d073c5ad30a4b484d1b412ea226 /tools
parentb421f931ee549ea10e588f82e7cc1df4c24e2c2a (diff)
downloadFreeBSD-src-41068e3dad4b13c5f9eb42f4dc310e9abf19de43.zip
FreeBSD-src-41068e3dad4b13c5f9eb42f4dc310e9abf19de43.tar.gz
It is 2014 and we have a new version of netmap.
Most relevant features: - netmap emulation on any NIC, even those without native netmap support. On the ixgbe we have measured about 4Mpps/core/queue in this mode, which is still a lot more than with sockets/bpf. - seamless interconnection of VALE switch, NICs and host stack. If you disable accelerations on your NIC (say em0) ifconfig em0 -txcsum -txcsum you can use the VALE switch to connect the NIC and the host stack: vale-ctl -h valeXX:em0 allowing sharing the NIC with other netmap clients. - THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers instead of pointers/count as before). This was unavoidable to support, in the future, multiple threads operating on the same rings. Netmap clients require very small source code changes to compile again. On the plus side, the new API should be easier to understand and the internals are a lot simpler. The manual page has been updated extensively to reflect the current features and give some examples. This is the result of work of several people including Giuseppe Lettieri, Vincenzo Maffione, Michio Honda and myself, and has been financially supported by EU projects CHANGE and OPENLAB, from NetApp University Research Fund, NEC, and of course the Universita` di Pisa.
Diffstat (limited to 'tools')
-rw-r--r--tools/tools/netmap/bridge.c36
-rw-r--r--tools/tools/netmap/nm_util.c91
-rw-r--r--tools/tools/netmap/nm_util.h8
-rw-r--r--tools/tools/netmap/pcap.c18
-rw-r--r--tools/tools/netmap/pkt-gen.c297
-rw-r--r--tools/tools/netmap/vale-ctl.c16
6 files changed, 306 insertions, 160 deletions
diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c
index 0aca44d..6dc77e4 100644
--- a/tools/tools/netmap/bridge.c
+++ b/tools/tools/netmap/bridge.c
@@ -1,5 +1,5 @@
/*
- * (C) 2011 Luigi Rizzo, Matteo Landi
+ * (C) 2011-2014 Luigi Rizzo, Matteo Landi
*
* BSD license
*
@@ -42,10 +42,12 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
msg, rxring->flags, txring->flags);
j = rxring->cur; /* RX */
k = txring->cur; /* TX */
- if (rxring->avail < limit)
- limit = rxring->avail;
- if (txring->avail < limit)
- limit = txring->avail;
+ m = nm_ring_space(rxring);
+ if (m < limit)
+ limit = m;
+ m = nm_ring_space(txring);
+ if (m < limit)
+ limit = m;
m = limit;
while (limit-- > 0) {
struct netmap_slot *rs = &rxring->slot[j];
@@ -81,13 +83,11 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
ts->flags |= NS_BUF_CHANGED;
rs->flags |= NS_BUF_CHANGED;
#endif /* NO_SWAP */
- j = NETMAP_RING_NEXT(rxring, j);
- k = NETMAP_RING_NEXT(txring, k);
+ j = nm_ring_next(rxring, j);
+ k = nm_ring_next(txring, k);
}
- rxring->avail -= m;
- txring->avail -= m;
- rxring->cur = j;
- txring->cur = k;
+ rxring->head = rxring->cur = j;
+ txring->head = txring->cur = k;
if (verbose && m > 0)
D("%s sent %d packets to %p", msg, m, txring);
@@ -107,11 +107,11 @@ move(struct my_ring *src, struct my_ring *dst, u_int limit)
rxring = NETMAP_RXRING(src->nifp, si);
txring = NETMAP_TXRING(dst->nifp, di);
ND("txring %p rxring %p", txring, rxring);
- if (rxring->avail == 0) {
+ if (nm_ring_empty(rxring)) {
si++;
continue;
}
- if (txring->avail == 0) {
+ if (nm_ring_empty(txring)) {
di++;
continue;
}
@@ -133,7 +133,7 @@ pkt_queued(struct my_ring *me, int tx)
for (i = me->begin; i < me->end; i++) {
struct netmap_ring *ring = tx ?
NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i);
- tot += ring->avail;
+ tot += nm_ring_space(ring);
}
if (0 && verbose && tot && !tx)
D("ring %s %s %s has %d avail at %d",
@@ -288,12 +288,12 @@ main(int argc, char **argv)
if (ret < 0)
continue;
if (pollfd[0].revents & POLLERR) {
- D("error on fd0, rxcur %d@%d",
- me[0].rx->avail, me[0].rx->cur);
+ D("error on fd0, rx [%d,%d)",
+ me[0].rx->cur, me[0].rx->tail);
}
if (pollfd[1].revents & POLLERR) {
- D("error on fd1, rxcur %d@%d",
- me[1].rx->avail, me[1].rx->cur);
+ D("error on fd1, rx [%d,%d)",
+ me[1].rx->cur, me[1].rx->tail);
}
if (pollfd[0].revents & POLLOUT) {
move(me + 1, me, burst);
diff --git a/tools/tools/netmap/nm_util.c b/tools/tools/netmap/nm_util.c
index 195b687..1268840 100644
--- a/tools/tools/netmap/nm_util.c
+++ b/tools/tools/netmap/nm_util.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2013 Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -232,7 +232,7 @@ pkt_queued(struct my_ring *me, int tx)
for (i = me->begin; i < me->end; i++) {
struct netmap_ring *ring = tx ?
NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i);
- tot += ring->avail;
+ tot += nm_ring_space(ring);
}
if (0 && verbose && tot && !tx)
D("ring %s %s %s has %d avail at %d",
@@ -242,3 +242,90 @@ pkt_queued(struct my_ring *me, int tx)
tot, NETMAP_TXRING(me->nifp, me->begin)->cur);
return tot;
}
+
+#if 0
+
+/*
+ *
+
+Helper routines for multiple readers from the same queue
+
+- all readers open the device in 'passive' mode (NETMAP_PRIV_RING set).
+ In this mode a thread that loses the race on a poll() just continues
+ without calling *xsync()
+
+- all readers share an extra 'ring' which contains the sync information.
+ In particular we have a shared head+tail pointers that work
+ together with cur and available
+ ON RETURN FROM THE SYSCALL:
+ shadow->head = ring->cur
+ shadow->tail = ring->tail
+ shadow->link[i] = i for all slots // mark invalid
+
+ */
+
+struct nm_q_arg {
+ u_int want; /* Input */
+ u_int have; /* Output, 0 on error */
+ u_int head;
+ u_int tail;
+ struct netmap_ring *ring;
+};
+
+/*
+ * grab a number of slots from the queue.
+ */
+struct nm_q_arg
+my_grab(struct nm_q_arg q)
+{
+ const u_int ns = q.ring->num_slots;
+
+ for (;;) {
+
+ q.head = (volatile u_int)q.ring->head;
+ q.have = ns + q.head - (volatile u_int)q.ring->tail;
+ if (q.have >= ns)
+ q.have -= ns;
+ if (q.have == 0) /* no space */
+ break;
+ if (q.want < q.have)
+ q.have = q.want;
+ q.tail = q.head + q.have;
+ if (q.tail >= ns)
+ q.tail -= ns;
+ if (atomic_cmpset_int(&q.ring->head, q.head, q.tail)
+ break; /* success */
+ }
+ D("returns %d out of %d at %d,%d",
+ q.have, q.want, q.head, q.tail);
+ /* the last one can clear avail ? */
+ return q;
+}
+
+
+int
+my_release(struct nm_q_arg q)
+{
+ u_int head = q.head, tail = q.tail, i;
+ struct netmap_ring *r = q.ring;
+
+ /* link the block to the next one.
+ * there is no race here because the location is mine.
+ */
+ r->slot[head].ptr = tail; /* this is mine */
+ // memory barrier
+ if (r->head != head)
+ return; /* not my turn to release */
+ for (;;) {
+ // advance head
+ r->head = head = r->slot[head].ptr;
+ // barrier ?
+ if (head == r->slot[head].ptr)
+ break; // stop here
+ }
+ /* we have advanced from q.head to head (r.head might be
+ * further down.
+ */
+ // do an ioctl/poll to flush.
+}
+#endif /* unused */
diff --git a/tools/tools/netmap/nm_util.h b/tools/tools/netmap/nm_util.h
index 0d64f13..d8f8f94 100644
--- a/tools/tools/netmap/nm_util.h
+++ b/tools/tools/netmap/nm_util.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012 Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -32,6 +32,9 @@
#ifndef _NM_UTIL_H
#define _NM_UTIL_H
+
+#define _GNU_SOURCE /* for CPU_SET() */
+
#include <errno.h>
#include <signal.h> /* signal */
#include <stdlib.h>
@@ -79,6 +82,9 @@ struct pcap_pkthdr;
#include <pthread.h> /* pthread_* */
#ifdef linux
+
+#define cpuset_t cpu_set_t
+
#define ifr_flagshigh ifr_flags
#define ifr_curcap ifr_flags
#define ifr_reqcap ifr_flags
diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c
index f30f57b..dd87c4a 100644
--- a/tools/tools/netmap/pcap.c
+++ b/tools/tools/netmap/pcap.c
@@ -1,5 +1,5 @@
/*
- * (C) 2011-2012 Luigi Rizzo
+ * (C) 2011-2014 Luigi Rizzo
*
* BSD license
*
@@ -499,15 +499,14 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
/* scan all rings */
for (si = me->begin; si < me->end; si++) {
struct netmap_ring *ring = NETMAP_RXRING(me->nifp, si);
- ND("ring has %d pkts", ring->avail);
- if (ring->avail == 0)
+ if (nm_ring_empty(ring))
continue;
pme->hdr.ts = ring->ts;
/*
* XXX a proper prefetch should be done as
* prefetch(i); callback(i-1); ...
*/
- while ((cnt == -1 || cnt != got) && ring->avail > 0) {
+ while ((cnt == -1 || cnt != got) && !nm_ring_empty(ring)) {
u_int i = ring->cur;
u_int idx = ring->slot[i].buf_idx;
if (idx < 2) {
@@ -520,8 +519,7 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
pme->hdr.len = pme->hdr.caplen = ring->slot[i].len;
// D("call %p len %d", p, me->hdr.len);
callback(user, &pme->hdr, buf);
- ring->cur = NETMAP_RING_NEXT(ring, i);
- ring->avail--;
+ ring->head = ring->cur = nm_ring_next(ring, i);
got++;
}
}
@@ -540,8 +538,7 @@ pcap_inject(pcap_t *p, const void *buf, size_t size)
for (si = me->begin; si < me->end; si++) {
struct netmap_ring *ring = NETMAP_TXRING(me->nifp, si);
- ND("ring has %d pkts", ring->avail);
- if (ring->avail == 0)
+ if (nm_ring_empty(ring))
continue;
u_int i = ring->cur;
u_int idx = ring->slot[i].buf_idx;
@@ -553,9 +550,8 @@ pcap_inject(pcap_t *p, const void *buf, size_t size)
u_char *dst = (u_char *)NETMAP_BUF(ring, idx);
ring->slot[i].len = size;
pkt_copy(buf, dst, size);
- ring->cur = NETMAP_RING_NEXT(ring, i);
- ring->avail--;
- // if (ring->avail == 0) ioctl(me->fd, NIOCTXSYNC, NULL);
+ ring->head = ring->cur = nm_ring_next(ring, i);
+ // if (ring->cur == ring->tail) ioctl(me->fd, NIOCTXSYNC, NULL);
return size;
}
errno = ENOBUFS;
diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c
index a6e5eeb..c1d0840 100644
--- a/tools/tools/netmap/pkt-gen.c
+++ b/tools/tools/netmap/pkt-gen.c
@@ -1,5 +1,6 @@
/*
- * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -52,7 +53,16 @@ int verbose = 0;
#define SKIP_PAYLOAD 1 /* do not check payload. */
+
+#define VIRT_HDR_1 10 /* length of a base vnet-hdr */
+#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */
+#define VIRT_HDR_MAX VIRT_HDR_2
+struct virt_header {
+ uint8_t fields[VIRT_HDR_MAX];
+};
+
struct pkt {
+ struct virt_header vh;
struct ether_header eh;
struct ip ip;
struct udphdr udp;
@@ -109,6 +119,8 @@ struct glob_arg {
char *ifname;
char *nmr_config;
int dummy_send;
+ int virt_header; /* send also the virt_header */
+ int host_ring;
};
enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
@@ -146,7 +158,8 @@ extract_ip_range(struct ip_range *r)
char *ap, *pp;
struct in_addr a;
- D("extract IP range from %s", r->name);
+ if (verbose)
+ D("extract IP range from %s", r->name);
r->port0 = r->port1 = 0;
r->start = r->end = 0;
@@ -192,7 +205,8 @@ extract_ip_range(struct ip_range *r)
a.s_addr = htonl(r->end);
strncpy(buf1, inet_ntoa(a), sizeof(buf1));
a.s_addr = htonl(r->start);
- D("range is %s:%d to %s:%d",
+ if (1)
+ D("range is %s:%d to %s:%d",
inet_ntoa(a), r->port0, buf1, r->port1);
}
}
@@ -200,7 +214,8 @@ extract_ip_range(struct ip_range *r)
static void
extract_mac_range(struct mac_range *r)
{
- D("extract MAC range from %s", r->name);
+ if (verbose)
+ D("extract MAC range from %s", r->name);
bcopy(ether_aton(r->name), &r->start, 6);
bcopy(ether_aton(r->name), &r->end, 6);
#if 0
@@ -215,7 +230,8 @@ extract_mac_range(struct mac_range *r)
if (p)
targ->dst_mac_range = atoi(p+1);
#endif
- D("%s starts at %s", r->name, ether_ntoa(&r->start));
+ if (verbose)
+ D("%s starts at %s", r->name, ether_ntoa(&r->start));
}
static struct targ *targs;
@@ -281,7 +297,7 @@ system_ncpus(void)
* Missing numbers or zeroes stand for default values.
* As an additional convenience, if exactly one number
* is specified, then this is assigned to both #tx-slots and #rx-slots.
- * If there is no 4th number, then the 3rd is assigned to both #tx-rings
+ * If there is no 4th number, then the 3rd is assigned to both #tx-rings
* and #rx-rings.
*/
void parse_nmr_config(const char* conf, struct nmreq *nmr)
@@ -362,7 +378,7 @@ source_hwaddr(const char *ifname, char *buf)
static int
setaffinity(pthread_t me, int i)
{
-#ifdef __FreeBSD__
+#if 1 // def __FreeBSD__
cpuset_t cpumask;
if (i == -1)
@@ -373,7 +389,7 @@ setaffinity(pthread_t me, int i)
CPU_SET(i, &cpumask);
if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
- D("Unable to set affinity");
+ D("Unable to set affinity: %s", strerror(errno));
return 1;
}
#else
@@ -559,6 +575,8 @@ initialize_packet(struct targ *targ)
bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
eh->ether_type = htons(ETHERTYPE_IP);
+
+ bzero(&pkt->vh, sizeof(pkt->vh));
// dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
}
@@ -570,18 +588,19 @@ initialize_packet(struct targ *targ)
* an interrupt when done.
*/
static int
-send_packets(struct netmap_ring *ring, struct pkt *pkt,
- struct glob_arg *g, u_int count, int options, u_int nfrags)
+send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame,
+ int size, struct glob_arg *g, u_int count, int options,
+ u_int nfrags)
{
- u_int sent, cur = ring->cur;
+ u_int n, sent, cur = ring->cur;
int fcnt;
- int size = g->pkt_size;
- if (ring->avail < count)
- count = ring->avail;
+ n = nm_ring_space(ring);
+ if (n < count)
+ count = n;
if (count < nfrags) {
D("truncating packet, no room for frags %d %d",
- count, nfrags);
+ count, nfrags);
}
#if 0
if (options & (OPT_COPY | OPT_PREFETCH) ) {
@@ -590,7 +609,7 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt,
char *p = NETMAP_BUF(ring, slot->buf_idx);
prefetch(p);
- cur = NETMAP_RING_NEXT(ring, cur);
+ cur = nm_ring_next(ring, cur);
}
cur = ring->cur;
}
@@ -602,13 +621,13 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt,
slot->flags = 0;
if (options & OPT_INDIRECT) {
slot->flags |= NS_INDIRECT;
- slot->ptr = (uint64_t)pkt;
+ slot->ptr = (uint64_t)frame;
} else if (options & OPT_COPY) {
- pkt_copy(pkt, p, size);
+ pkt_copy(frame, p, size);
if (fcnt == 1)
update_addresses(pkt, g);
} else if (options & OPT_MEMCPY) {
- memcpy(p, pkt, size);
+ memcpy(p, frame, size);
if (fcnt == 1)
update_addresses(pkt, g);
} else if (options & OPT_PREFETCH) {
@@ -625,10 +644,9 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt,
slot->flags &= ~NS_MOREFRAG;
slot->flags |= NS_REPORT;
}
- cur = NETMAP_RING_NEXT(ring, cur);
+ cur = nm_ring_next(ring, cur);
}
- ring->avail -= sent;
- ring->cur = cur;
+ ring->head = ring->cur = cur;
return (sent);
}
@@ -647,6 +665,12 @@ pinger_body(void *data)
struct pollfd fds[1];
struct netmap_if *nifp = targ->nifp;
int i, rx = 0, n = targ->g->npackets;
+ void *frame;
+ int size;
+
+ frame = &targ->pkt;
+ frame += sizeof(targ->pkt.vh) - targ->g->virt_header;
+ size = targ->g->pkt_size + targ->g->virt_header;
fds[0].fd = targ->fd;
fds[0].events = (POLLIN);
@@ -660,36 +684,37 @@ pinger_body(void *data)
}
clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
+ now = last_print;
while (n == 0 || (int)sent < n) {
struct netmap_ring *ring = NETMAP_TXRING(nifp, 0);
struct netmap_slot *slot;
char *p;
- for (i = 0; i < 1; i++) {
+ for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */
slot = &ring->slot[ring->cur];
- slot->len = targ->g->pkt_size;
+ slot->len = size;
p = NETMAP_BUF(ring, slot->buf_idx);
- if (ring->avail == 0) {
+ if (nm_ring_empty(ring)) {
D("-- ouch, cannot send");
} else {
- pkt_copy(&targ->pkt, p, targ->g->pkt_size);
+ pkt_copy(frame, p, size);
clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
bcopy(&sent, p+42, sizeof(sent));
bcopy(&ts, p+46, sizeof(ts));
sent++;
- ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
- ring->avail--;
+ ring->head = ring->cur = nm_ring_next(ring, ring->cur);
}
}
/* should use a parameter to decide how often to send */
if (poll(fds, 1, 3000) <= 0) {
- D("poll error/timeout on queue %d", targ->me);
+ D("poll error/timeout on queue %d: %s", targ->me,
+ strerror(errno));
continue;
}
/* see what we got back */
for (i = targ->qfirst; i < targ->qlast; i++) {
ring = NETMAP_RXRING(nifp, i);
- while (ring->avail > 0) {
+ while (!nm_ring_empty(ring)) {
uint32_t seq;
slot = &ring->slot[ring->cur];
p = NETMAP_BUF(ring, slot->buf_idx);
@@ -709,8 +734,7 @@ pinger_body(void *data)
min = ts.tv_nsec;
count ++;
av += ts.tv_nsec;
- ring->avail--;
- ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
+ ring->head = ring->cur = nm_ring_next(ring, ring->cur);
rx++;
}
}
@@ -761,25 +785,25 @@ ponger_body(void *data)
ioctl(fds[0].fd, NIOCRXSYNC, NULL);
#else
if (poll(fds, 1, 1000) <= 0) {
- D("poll error/timeout on queue %d", targ->me);
+ D("poll error/timeout on queue %d: %s", targ->me,
+ strerror(errno));
continue;
}
#endif
txring = NETMAP_TXRING(nifp, 0);
txcur = txring->cur;
- txavail = txring->avail;
+ txavail = nm_ring_space(txring);
/* see what we got back */
for (i = targ->qfirst; i < targ->qlast; i++) {
rxring = NETMAP_RXRING(nifp, i);
- while (rxring->avail > 0) {
+ while (!nm_ring_empty(rxring)) {
uint16_t *spkt, *dpkt;
uint32_t cur = rxring->cur;
struct netmap_slot *slot = &rxring->slot[cur];
char *src, *dst;
src = NETMAP_BUF(rxring, slot->buf_idx);
//D("got pkt %p of size %d", src, slot->len);
- rxring->avail--;
- rxring->cur = NETMAP_RING_NEXT(rxring, cur);
+ rxring->head = rxring->cur = nm_ring_next(rxring, cur);
rx++;
if (txavail == 0)
continue;
@@ -797,13 +821,12 @@ ponger_body(void *data)
dpkt[5] = spkt[2];
txring->slot[txcur].len = slot->len;
/* XXX swap src dst mac */
- txcur = NETMAP_RING_NEXT(txring, txcur);
+ txcur = nm_ring_next(txring, txcur);
txavail--;
sent++;
}
}
- txring->cur = txcur;
- txring->avail = txavail;
+ txring->head = txring->cur = txcur;
targ->count = sent;
#ifdef BUSYWAIT
ioctl(fds[0].fd, NIOCTXSYNC, NULL);
@@ -847,43 +870,47 @@ timespec2val(const struct timespec *a)
}
-static int
-wait_time(struct timespec ts, struct timespec *wakeup_ts, long long *waited)
+static __inline struct timespec
+timespec_add(struct timespec a, struct timespec b)
{
- struct timespec curtime;
-
- curtime.tv_sec = 0;
- curtime.tv_nsec = 0;
-
- if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) {
- D("clock_gettime: %s", strerror(errno));
- return (-1);
- }
- while (timespec_ge(&ts, &curtime)) {
- if (waited != NULL)
- (*waited)++;
- if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) {
- D("clock_gettime");
- return (-1);
- }
+ struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec };
+ if (ret.tv_nsec >= 1000000000) {
+ ret.tv_sec++;
+ ret.tv_nsec -= 1000000000;
}
- if (wakeup_ts != NULL)
- *wakeup_ts = curtime;
- return (0);
+ return ret;
}
-static __inline void
-timespec_add(struct timespec *tsa, struct timespec *tsb)
+static __inline struct timespec
+timespec_sub(struct timespec a, struct timespec b)
{
- tsa->tv_sec += tsb->tv_sec;
- tsa->tv_nsec += tsb->tv_nsec;
- if (tsa->tv_nsec >= 1000000000) {
- tsa->tv_sec++;
- tsa->tv_nsec -= 1000000000;
+ struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec };
+ if (ret.tv_nsec < 0) {
+ ret.tv_sec--;
+ ret.tv_nsec += 1000000000;
}
+ return ret;
}
+/*
+ * wait until ts, either busy or sleeping if more than 1ms.
+ * Return wakeup time.
+ */
+static struct timespec
+wait_time(struct timespec ts)
+{
+ for (;;) {
+ struct timespec w, cur;
+ clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
+ w = timespec_sub(ts, cur);
+ if (w.tv_sec < 0)
+ return cur;
+ else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
+ poll(NULL, 0, 1);
+ }
+}
+
static void *
sender_body(void *data)
{
@@ -894,9 +921,15 @@ sender_body(void *data)
struct netmap_ring *txring;
int i, n = targ->g->npackets / targ->g->nthreads, sent = 0;
int options = targ->g->options | OPT_COPY;
- struct timespec tmptime, nexttime = { 0, 0}; // XXX silence compiler
+ struct timespec nexttime = { 0, 0}; // XXX silence compiler
int rate_limit = targ->g->tx_rate;
- long long waited = 0;
+ struct pkt *pkt = &targ->pkt;
+ void *frame;
+ int size;
+
+ frame = pkt;
+ frame += sizeof(pkt->vh) - targ->g->virt_header;
+ size = targ->g->pkt_size + targ->g->virt_header;
D("start");
if (setaffinity(targ->thread, targ->affinity))
@@ -909,23 +942,16 @@ sender_body(void *data)
/* main loop.*/
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
if (rate_limit) {
- tmptime.tv_sec = 2;
- tmptime.tv_nsec = 0;
- timespec_add(&targ->tic, &tmptime);
+ targ->tic = timespec_add(targ->tic, (struct timespec){2,0});
targ->tic.tv_nsec = 0;
- if (wait_time(targ->tic, NULL, NULL) == -1) {
- D("wait_time: %s", strerror(errno));
- goto quit;
- }
+ wait_time(targ->tic);
nexttime = targ->tic;
}
if (targ->g->dev_type == DEV_PCAP) {
- int size = targ->g->pkt_size;
- void *pkt = &targ->pkt;
pcap_t *p = targ->g->p;
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
- if (pcap_inject(p, pkt, size) != -1)
+ if (pcap_inject(p, frame, size) != -1)
sent++;
update_addresses(pkt, targ->g);
if (i > 10000) {
@@ -934,12 +960,10 @@ sender_body(void *data)
}
}
} else if (targ->g->dev_type == DEV_TAP) { /* tap */
- int size = targ->g->pkt_size;
- void *pkt = &targ->pkt;
D("writing to file desc %d", targ->g->main_fd);
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
- if (write(targ->g->main_fd, pkt, size) != -1)
+ if (write(targ->g->main_fd, frame, size) != -1)
sent++;
update_addresses(pkt, targ->g);
if (i > 10000) {
@@ -955,11 +979,8 @@ sender_body(void *data)
if (rate_limit && tosend <= 0) {
tosend = targ->g->burst;
- timespec_add(&nexttime, &targ->g->tx_period);
- if (wait_time(nexttime, &tmptime, &waited) == -1) {
- D("wait_time");
- goto quit;
- }
+ nexttime = timespec_add(nexttime, targ->g->tx_period);
+ wait_time(nexttime);
}
/*
@@ -968,7 +989,12 @@ sender_body(void *data)
if (poll(fds, 1, 2000) <= 0) {
if (targ->cancel)
break;
- D("poll error/timeout on queue %d", targ->me);
+ D("poll error/timeout on queue %d: %s", targ->me,
+ strerror(errno));
+ goto quit;
+ }
+ if (fds[0].revents & POLLERR) {
+ D("poll error");
goto quit;
}
/*
@@ -983,12 +1009,12 @@ sender_body(void *data)
if (n > 0 && n - sent < limit)
limit = n - sent;
txring = NETMAP_TXRING(nifp, i);
- if (txring->avail == 0)
+ if (nm_ring_empty(txring))
continue;
if (frags > 1)
limit = ((limit + frags - 1) / frags) * frags;
- m = send_packets(txring, &targ->pkt, targ->g,
+ m = send_packets(txring, pkt, frame, size, targ->g,
limit, options, frags);
ND("limit %d avail %d frags %d m %d",
limit, txring->avail, frags, m);
@@ -1007,7 +1033,7 @@ sender_body(void *data)
/* final part: wait all the TX queues to be empty. */
for (i = targ->qfirst; i < targ->qlast; i++) {
txring = NETMAP_TXRING(nifp, i);
- while (!NETMAP_TX_RING_EMPTY(txring)) {
+ while (nm_tx_pending(txring)) {
ioctl(fds[0].fd, NIOCTXSYNC, NULL);
usleep(1); /* wait 1 tick */
}
@@ -1039,11 +1065,12 @@ receive_pcap(u_char *user, const struct pcap_pkthdr * h,
static int
receive_packets(struct netmap_ring *ring, u_int limit, int dump)
{
- u_int cur, rx;
+ u_int cur, rx, n;
cur = ring->cur;
- if (ring->avail < limit)
- limit = ring->avail;
+ n = nm_ring_space(ring);
+ if (n < limit)
+ limit = n;
for (rx = 0; rx < limit; rx++) {
struct netmap_slot *slot = &ring->slot[cur];
char *p = NETMAP_BUF(ring, slot->buf_idx);
@@ -1051,10 +1078,9 @@ receive_packets(struct netmap_ring *ring, u_int limit, int dump)
if (dump)
dump_payload(p, slot->len, ring, cur);
- cur = NETMAP_RING_NEXT(ring, cur);
+ cur = nm_ring_next(ring, cur);
}
- ring->avail -= rx;
- ring->cur = cur;
+ ring->head = ring->cur = cur;
return (rx);
}
@@ -1082,7 +1108,7 @@ receiver_body(void *data)
i = poll(fds, 1, 1000);
if (i > 0 && !(fds[0].revents & POLLERR))
break;
- D("waiting for initial packets, poll returns %d %d", i, fds[0].revents);
+ RD(1, "waiting for initial packets, poll returns %d %d", i, fds[0].revents);
}
/* main loop, exit after 1s silence */
@@ -1111,11 +1137,16 @@ receiver_body(void *data)
break;
}
+ if (fds[0].revents & POLLERR) {
+ D("poll err");
+ goto quit;
+ }
+
for (i = targ->qfirst; i < targ->qlast; i++) {
int m;
rxring = NETMAP_RXRING(nifp, i);
- if (rxring->avail == 0)
+ if (nm_ring_empty(rxring))
continue;
m = receive_packets(rxring, targ->g->burst, dump);
@@ -1215,6 +1246,8 @@ usage(void)
"\t-w wait_for_link_time in seconds\n"
"\t-R rate in packets per second\n"
"\t-X dump payload\n"
+ "\t-H len add empty virtio-net-header with size 'len'\n"
+ "\t-h use host ring\n"
"",
cmd);
@@ -1243,7 +1276,7 @@ start_threads(struct glob_arg *g)
/* register interface. */
tfd = open("/dev/netmap", O_RDWR);
if (tfd == -1) {
- D("Unable to open /dev/netmap");
+ D("Unable to open /dev/netmap: %s", strerror(errno));
continue;
}
targs[i].fd = tfd;
@@ -1251,7 +1284,11 @@ start_threads(struct glob_arg *g)
bzero(&tifreq, sizeof(tifreq));
strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name));
tifreq.nr_version = NETMAP_API;
- tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
+ if (g->host_ring) {
+ tifreq.nr_ringid = NETMAP_SW_RING;
+ } else {
+ tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
+ }
parse_nmr_config(g->nmr_config, &tifreq);
/*
@@ -1264,7 +1301,7 @@ start_threads(struct glob_arg *g)
}
if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
- D("Unable to register %s", g->ifname);
+ D("Unable to register %s: %s", g->ifname, strerror(errno));
continue;
}
D("memsize is %d MB", tifreq.nr_memsize >> 20);
@@ -1272,9 +1309,14 @@ start_threads(struct glob_arg *g)
targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset);
D("nifp flags 0x%x", targs[i].nifp->ni_flags);
/* start threads. */
- targs[i].qfirst = (g->nthreads > 1) ? i : 0;
- targs[i].qlast = (g->nthreads > 1) ? i+1 :
- (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
+ if (g->host_ring) {
+ targs[i].qfirst = (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
+ targs[i].qlast = targs[i].qfirst + 1;
+ } else {
+ targs[i].qfirst = (g->nthreads > 1) ? i : 0;
+ targs[i].qlast = (g->nthreads > 1) ? i+1 :
+ (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
+ }
} else {
targs[i].fd = g->main_fd;
}
@@ -1292,7 +1334,7 @@ start_threads(struct glob_arg *g)
if (pthread_create(&targs[i].thread, NULL, g->td_body,
&targs[i]) == -1) {
- D("Unable to create thread %d", i);
+ D("Unable to create thread %d: %s", i, strerror(errno));
targs[i].used = 0;
}
}
@@ -1439,7 +1481,7 @@ tap_alloc(char *dev)
/* try to create the device */
if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
- D("failed to to a TUNSETIFF");
+ D("failed to to a TUNSETIFF: %s", strerror(errno));
close(fd);
return err;
}
@@ -1488,9 +1530,10 @@ main(int arc, char **argv)
g.tx_rate = 0;
g.frags = 1;
g.nmr_config = "";
+ g.virt_header = 0;
while ( (ch = getopt(arc, argv,
- "a:f:F:n:i:It:r:l:d:s:D:S:b:c:o:p:PT:w:WvR:XC:")) != -1) {
+ "a:f:F:n:i:It:r:l:d:s:D:S:b:c:o:p:PT:w:WvR:XC:H:h")) != -1) {
struct sf *fn;
switch(ch) {
@@ -1613,6 +1656,11 @@ main(int arc, char **argv)
break;
case 'C':
g.nmr_config = strdup(optarg);
+ break;
+ case 'H':
+ g.virt_header = atoi(optarg);
+ case 'h':
+ g.host_ring = 1;
}
}
@@ -1649,6 +1697,12 @@ main(int arc, char **argv)
extract_mac_range(&g.src_mac);
extract_mac_range(&g.dst_mac);
+ if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1
+ && g.virt_header != VIRT_HDR_2) {
+ D("bad virtio-net-header length");
+ usage();
+ }
+
if (g.dev_type == DEV_TAP) {
D("want to use tap %s", g.ifname);
g.main_fd = tap_alloc(g.ifname);
@@ -1682,7 +1736,7 @@ main(int arc, char **argv)
*/
g.main_fd = open("/dev/netmap", O_RDWR);
if (g.main_fd == -1) {
- D("Unable to open /dev/netmap");
+ D("Unable to open /dev/netmap: %s", strerror(errno));
// fail later
}
/*
@@ -1696,22 +1750,16 @@ main(int arc, char **argv)
bzero(&nmr, sizeof(nmr));
nmr.nr_version = NETMAP_API;
strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name));
- nmr.nr_version = NETMAP_API;
parse_nmr_config(g.nmr_config, &nmr);
if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) {
- D("Unable to register interface %s", g.ifname);
+ D("Unable to register interface %s: %s", g.ifname, strerror(errno));
//continue, fail later
}
ND("%s: txr %d txd %d rxr %d rxd %d", g.ifname,
nmr.nr_tx_rings, nmr.nr_tx_slots,
nmr.nr_rx_rings, nmr.nr_rx_slots);
- //if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
- // D("Unable to get if info without name");
- //} else {
- // D("map size is %d Kb", nmr.nr_memsize >> 10);
- //}
if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
- D("Unable to get if info for %s", g.ifname);
+ D("Unable to get if info for %s: %s", g.ifname, strerror(errno));
}
devqueues = nmr.nr_rx_rings;
@@ -1732,7 +1780,7 @@ main(int arc, char **argv)
PROT_WRITE | PROT_READ,
MAP_SHARED, g.main_fd, 0);
if (g.mmap_addr == MAP_FAILED) {
- D("Unable to mmap %d KB", nmr.nr_memsize >> 10);
+ D("Unable to mmap %d KB: %s", nmr.nr_memsize >> 10, strerror(errno));
// continue, fail later
}
@@ -1772,14 +1820,17 @@ main(int arc, char **argv)
g.tx_period.tv_sec = g.tx_period.tv_nsec = 0;
if (g.tx_rate > 0) {
/* try to have at least something every second,
- * reducing the burst size to 0.5s worth of data
+ * reducing the burst size to some 0.01s worth of data
* (but no less than one full set of fragments)
*/
- if (g.burst > g.tx_rate/2)
- g.burst = g.tx_rate/2;
+ uint64_t x;
+ int lim = (g.tx_rate)/300;
+ if (g.burst > lim)
+ g.burst = lim;
if (g.burst < g.frags)
g.burst = g.frags;
- g.tx_period.tv_nsec = (1e9 / g.tx_rate) * g.burst;
+ x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate;
+ g.tx_period.tv_nsec = x;
g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000;
}
diff --git a/tools/tools/netmap/vale-ctl.c b/tools/tools/netmap/vale-ctl.c
index 0a478ba..c0cf574 100644
--- a/tools/tools/netmap/vale-ctl.c
+++ b/tools/tools/netmap/vale-ctl.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Michio Honda. All rights reserved.
+ * Copyright (C) 2013-2014 Michio Honda. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -118,7 +118,7 @@ main(int argc, char *argv[])
const char *command = basename(argv[0]);
char *name = NULL;
- if (argc != 3 && argc != 1 /* list all */ ) {
+ if (argc > 3) {
usage:
fprintf(stderr,
"Usage:\n"
@@ -127,12 +127,13 @@ usage:
"\t-d interface interface name to be detached\n"
"\t-a interface interface name to be attached\n"
"\t-h interface interface name to be attached with the host stack\n"
- "\t-l list all or specified bridge's interfaces\n"
+ "\t-l list all or specified bridge's interfaces (default)\n"
"", command);
return 0;
}
- while ((ch = getopt(argc, argv, "d:a:h:g:l:")) != -1) {
+ while ((ch = getopt(argc, argv, "d:a:h:g:l")) != -1) {
+ name = optarg; /* default */
switch (ch) {
default:
fprintf(stderr, "bad option %c %s", ch, optarg);
@@ -152,9 +153,14 @@ usage:
break;
case 'l':
nr_cmd = NETMAP_BDG_LIST;
+ if (optind < argc && argv[optind][0] == '-')
+ name = NULL;
break;
}
- name = optarg;
+ if (optind != argc) {
+ // fprintf(stderr, "optind %d argc %d\n", optind, argc);
+ goto usage;
+ }
}
if (argc == 1)
nr_cmd = NETMAP_BDG_LIST;
OpenPOWER on IntegriCloud