summaryrefslogtreecommitdiffstats
path: root/sys/net
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2012-02-27 19:05:01 +0000
committerluigi <luigi@FreeBSD.org>2012-02-27 19:05:01 +0000
commit3ac0fcfb9762b2fd4991f32bff09543ba13df0d0 (patch)
treea547096f4399bc66370c43d717a40e4b79eb8401 /sys/net
parent71d18727cc7b50dc4e7c4d02cab4232fd4b10711 (diff)
downloadFreeBSD-src-3ac0fcfb9762b2fd4991f32bff09543ba13df0d0.zip
FreeBSD-src-3ac0fcfb9762b2fd4991f32bff09543ba13df0d0.tar.gz
A bunch of netmap fixes:
USERSPACE: 1. add support for devices with different number of rx and tx queues; 2. add better support for zero-copy operation, adding an extra field to the netmap ring to indicate how many buffers we have already processed but not yet released (with help from Eddie Kohler); 3. The two changes above unfortunately require an API change, so while at it add a version field and some spares to the ioctl() argument to help detect mismatches. 4. update the manual page for the two changes above; 5. update sample applications in tools/tools/netmap KERNEL: 1. simplify the internal structures moving the global wait queues to the 'struct netmap_adapter'; 2. simplify the functions that map kring<->nic ring indexes 3. normalize device-specific code, helps mainteinance; 4. start exploring the impact of micro-optimizations (prefetch etc.) in the ixgbe driver. Use 'legacy' descriptors on the tx ring and prefetch slots gives about 20% speedup at 900 MHz. Another 7-10% would come from removing the explict calls to bus_dmamap* in the core (they are effectively NOPs in this case, but it takes expensive load of the per-buffer dma maps to figure out that they are all NULL. Rx performance not investigated. I am postponing the MFC so i can import a few more improvements before merging.
Diffstat (limited to 'sys/net')
-rw-r--r--sys/net/netmap.h143
-rw-r--r--sys/net/netmap_user.h18
2 files changed, 92 insertions, 69 deletions
diff --git a/sys/net/netmap.h b/sys/net/netmap.h
index 0ba1537..888c15b 100644
--- a/sys/net/netmap.h
+++ b/sys/net/netmap.h
@@ -32,11 +32,13 @@
/*
* $FreeBSD$
- * $Id: netmap.h 9753 2011-11-28 15:10:43Z luigi $
+ * $Id: netmap.h 10601 2012-02-21 16:40:14Z luigi $
*
- * This header contains the definitions of the constants and the
- * structures needed by the ``netmap'' module, both kernel and
- * userspace.
+ * Definitions of constants and the structures used by the netmap
+ * framework, for the part visible to both kernel and userspace.
+ * Detailed info on netmap is available with "man netmap" or at
+ *
+ * http://info.iet.unipi.it/~luigi/netmap/
*/
#ifndef _NET_NETMAP_H_
@@ -48,14 +50,8 @@
* The data structures used by netmap are shown below. Those in
* capital letters are in an mmapp()ed area shared with userspace,
* while others are private to the kernel.
- * Shared structures do not contain pointers but only relative
+ * Shared structures do not contain pointers but only memory
* offsets, so that addressing is portable between kernel and userspace.
- *
- * The 'softc' of each interface is extended with a struct netmap_adapter
- * containing information to support netmap operation. In addition to
- * the fixed fields, it has two pointers to reach the arrays of
- * 'struct netmap_kring' which in turn reaches the various
- * struct netmap_ring, shared with userspace.
softc
@@ -67,19 +63,22 @@
+----------------+<------+
|(netmap_adapter)|
| | netmap_kring
-| tx_rings *--------------------------------->+-------------+
-| | netmap_kring | ring *---------> ...
-| rx_rings *---------->+--------------+ | nr_hwcur |
-+----------------+ | ring *-------+ | nr_hwavail |
- | nr_hwcur | | | selinfo |
- | nr_hwavail | | +-------------+
- | selinfo | | | ... |
- +--------------+ | (na_num_rings+1 entries)
- | .... | | | |
- (na_num_rings+1 entries) +-------------+
- | | |
- +--------------+ |
- | NETMAP_RING
+| tx_rings *--------------------------------->+---------------+
+| | netmap_kring | ring *---------.
+| rx_rings *--------->+---------------+ | nr_hwcur | |
++----------------+ | ring *--------. | nr_hwavail | V
+ | nr_hwcur | | | selinfo | |
+ | nr_hwavail | | +---------------+ .
+ | selinfo | | | ... | .
+ +---------------+ | |(ntx+1 entries)|
+ | .... | | | |
+ |(nrx+1 entries)| | +---------------+
+ | | |
+ KERNEL +---------------+ |
+ |
+ ====================================================================
+ |
+ USERSPACE | NETMAP_RING
+---->+-------------+
/ | cur |
NETMAP_IF (nifp, one per file desc.) / | avail |
@@ -100,16 +99,23 @@
| txring_ofs[n] |
+---------------+
- * The NETMAP_RING is the shadow ring that mirrors the NIC rings.
+ * The private descriptor ('softc' or 'adapter') of each interface
+ * is extended with a "struct netmap_adapter" containing netmap-related
+ * info (see description in dev/netmap/netmap_kernel.h.
+ * Among other things, tx_rings and rx_rings point to the arrays of
+ * "struct netmap_kring" which in turn reache the various
+ * "struct netmap_ring", shared with userspace.
+
+ * The NETMAP_RING is the userspace-visible replica of the NIC ring.
* Each slot has the index of a buffer, its length and some flags.
* In user space, the buffer address is computed as
- * (char *)ring + buf_ofs + index*MAX_BUF_SIZE
+ * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE
* In the kernel, buffers do not necessarily need to be contiguous,
* and the virtual and physical addresses are derived through
- * a lookup table. When userspace wants to use a different buffer
- * in a location, it must set the NS_BUF_CHANGED flag to make
- * sure that the kernel recomputes updates the hardware ring and
- * other fields (bus_dmamap, etc.) as needed.
+ * a lookup table.
+ * To associate a different buffer to a slot, applications must
+ * write the new index in buf_idx, and set NS_BUF_CHANGED flag to
+ * make sure that the kernel updates the hardware ring as needed.
*
* Normally the driver is not requested to report the result of
* transmissions (this can dramatically speed up operation).
@@ -133,13 +139,16 @@ struct netmap_slot {
*
* In TX rings:
* avail indicates the number of slots available for transmission.
- * It is decremented by the application when it appends a
- * packet, and set to nr_hwavail (see below) on a
- * NIOCTXSYNC to reflect the actual state of the queue
- * (keeping track of completed transmissions).
- * cur indicates the empty slot to use for the next packet
+ * It is updated by the kernel after every netmap system call.
+ * It MUST BE decremented by the application when it appends a
+ * packet.
+ * cur indicates the slot to use for the next packet
* to send (i.e. the "tail" of the queue).
- * It is incremented by the application.
+ * It MUST BE incremented by the application before
+ * netmap system calls to reflect the number of newly
+ * sent packets.
+ * It is checked by the kernel on netmap system calls
+ * (normally unmodified by the kernel unless invalid).
*
* The kernel side of netmap uses two additional fields in its own
* private ring structure, netmap_kring:
@@ -153,12 +162,17 @@ struct netmap_slot {
*
* In RX rings:
* avail is the number of packets available (possibly 0).
- * It is decremented by the software when it consumes
- * a packet, and set to nr_hwavail on a NIOCRXSYNC
- * cur indicates the first slot that contains a packet
- * (the "head" of the queue).
- * It is incremented by the software when it consumes
+ * It MUST BE decremented by the application when it consumes
+ * a packet, and it is updated to nr_hwavail on a NIOCRXSYNC
+ * cur indicates the first slot that contains a packet not
+ * processed yet (the "head" of the queue).
+ * It MUST BE incremented by the software when it consumes
* a packet.
+ * reserved indicates the number of buffers before 'cur'
+ * that the application has still in use. Normally 0,
+ * it MUST BE incremented by the application when it
+ * does not return the buffer immediately, and decremented
+ * when the buffer is finally freed.
*
* The kernel side of netmap uses two additional fields in the kring:
* nr_hwcur is a copy of nr_cur on an NIOCRXSYNC
@@ -182,7 +196,8 @@ struct netmap_ring {
const ssize_t buf_ofs;
const uint32_t num_slots; /* number of slots in the ring. */
uint32_t avail; /* number of usable slots */
- uint32_t cur; /* 'current' r/w position */
+ uint32_t cur; /* 'current' r/w position */
+ uint32_t reserved; /* not refilled before current */
const uint16_t nr_buf_size;
uint16_t flags;
@@ -191,7 +206,7 @@ struct netmap_ring {
struct timeval ts; /* time of last *sync() */
/* the slots follow. This struct has variable size */
- struct netmap_slot slot[0]; /* array of slots. */
+ struct netmap_slot slot[0]; /* array of slots. */
};
@@ -204,24 +219,23 @@ struct netmap_ring {
* nmr_queueid passed on the ioctl.
*/
struct netmap_if {
- char ni_name[IFNAMSIZ]; /* name of the interface. */
- const u_int ni_version; /* API version, currently unused */
- const u_int ni_num_queues; /* number of queue pairs (TX/RX). */
- const u_int ni_rx_queues; /* if zero, use ni_num_queues */
+ char ni_name[IFNAMSIZ]; /* name of the interface. */
+ const u_int ni_version; /* API version, currently unused */
+ const u_int ni_rx_queues; /* number of rx queue pairs */
+ const u_int ni_tx_queues; /* if zero, same as ni_tx_queues */
/*
- * the following array contains the offset of the
- * each netmap ring from this structure. The first num_queues+1
- * refer to the tx rings, the next n+1 refer to the rx rings.
+ * The following array contains the offset of each netmap ring
+ * from this structure. The first ni_tx_queues+1 entries refer
+ * to the tx rings, the next ni_rx_queues+1 refer to the rx rings
+ * (the last entry in each block refers to the host stack rings).
* The area is filled up by the kernel on NIOCREG,
* and then only read by userspace code.
- * entries 0..ni_num_queues-1 indicate the hardware queues,
- * entry ni_num_queues is the queue from/to the stack.
*/
const ssize_t ring_ofs[0];
};
-#ifndef IFCAP_NETMAP /* this should go in net/if.h */
-#define IFCAP_NETMAP 0x100000
+#ifndef IFCAP_NETMAP
+#define IFCAP_NETMAP 0x100000 /* used on linux */
#endif
#ifndef NIOCREGIF
@@ -246,18 +260,29 @@ struct netmap_if {
*/
struct nmreq {
char nr_name[IFNAMSIZ];
- uint32_t nr_version; /* API version (unused) */
+ uint32_t nr_version; /* API version */
+#define NETMAP_API 2 /* current version */
uint32_t nr_offset; /* nifp offset in the shared region */
uint32_t nr_memsize; /* size of the shared region */
- uint32_t nr_numslots; /* descriptors per queue */
- uint16_t nr_numrings;
+ uint32_t nr_tx_slots; /* slots in tx rings */
+ uint32_t nr_rx_slots; /* slots in rx rings */
+ uint16_t nr_tx_rings; /* number of tx rings */
+ uint16_t nr_rx_rings; /* number of rx rings */
uint16_t nr_ringid; /* ring(s) we care about */
#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */
-#define NETMAP_SW_RING 0x2000 /* we process the sw ring */
-#define NETMAP_NO_TX_POLL 0x1000 /* no gratuitous txsync on poll */
+#define NETMAP_SW_RING 0x2000 /* process the sw ring */
+#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
#define NETMAP_RING_MASK 0xfff /* the ring number */
+ uint16_t spare1;
+ uint32_t spare2[4];
};
+/*
+ * FreeBSD uses the size value embedded in the _IOWR to determine
+ * how much to copy in/out. So we need it to match the actual
+ * data structure we pass. We put some spares in the structure
+ * to ease compatibility with other versions
+ */
#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
#define NIOCUNREGIF _IO('i', 147) /* interface unregister */
diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h
index 6449045..be66e7a 100644
--- a/sys/net/netmap_user.h
+++ b/sys/net/netmap_user.h
@@ -32,14 +32,13 @@
/*
* $FreeBSD$
- * $Id: netmap_user.h 9495 2011-10-18 15:28:23Z luigi $
+ * $Id: netmap_user.h 10597 2012-02-21 05:08:32Z luigi $
*
* This header contains the macros used to manipulate netmap structures
* and packets in userspace. See netmap(4) for more information.
*
- * The address of the struct netmap_if, say nifp, is determined
- * by the value returned from ioctl(.., NIOCREG, ...) and the mmap
- * region:
+ * The address of the struct netmap_if, say nifp, is computed from the
+ * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
* ioctl(fd, NIOCREG, &req);
* mem = mmap(0, ... );
* nifp = NETMAP_IF(mem, req.nr_nifp);
@@ -71,21 +70,20 @@
#define NETMAP_RXRING(nifp, index) \
((struct netmap_ring *)((char *)(nifp) + \
- (nifp)->ring_ofs[index + (nifp)->ni_num_queues+1] ) )
+ (nifp)->ring_ofs[index + (nifp)->ni_tx_queues+1] ) )
#define NETMAP_BUF(ring, index) \
((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
+#define NETMAP_BUF_IDX(ring, buf) \
+ ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
+ (ring)->nr_buf_size) )
+
#define NETMAP_RING_NEXT(r, i) \
((i)+1 == (r)->num_slots ? 0 : (i) + 1 )
/*
* Return 1 if the given tx ring is empty.
- *
- * @r netmap_ring descriptor pointer.
- * Special case, a negative value in hwavail indicates that the
- * transmit queue is idle.
- * XXX revise
*/
#define NETMAP_TX_RING_EMPTY(r) ((r)->avail >= (r)->num_slots - 1)
OpenPOWER on IntegriCloud