summaryrefslogtreecommitdiffstats
path: root/share/man/man4/netmap.4
diff options
context:
space:
mode:
Diffstat (limited to 'share/man/man4/netmap.4')
-rw-r--r--share/man/man4/netmap.4300
1 files changed, 300 insertions, 0 deletions
diff --git a/share/man/man4/netmap.4 b/share/man/man4/netmap.4
new file mode 100644
index 0000000..8b646f9
--- /dev/null
+++ b/share/man/man4/netmap.4
@@ -0,0 +1,300 @@
+.\" Copyright (c) 2011 Matteo Landi, Luigi Rizzo, Universita` di Pisa
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" This document is derived in part from the enet man page (enet.4)
+.\" distributed with 4.3BSD Unix.
+.\"
+.\" $FreeBSD$
+.\" $Id: netmap.4 9662 2011-11-16 13:18:06Z luigi $: stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $
+.\"
+.Dd November 16, 2011
+.Dt NETMAP 4
+.Os
+.Sh NAME
+.Nm netmap
+.Nd a framework for fast packet I/O
+.Sh SYNOPSIS
+.Cd device netmap
+.Sh DESCRIPTION
+.Nm
+is a framework for fast and safe access to network devices
+(reaching 14.88 Mpps at less than 1 GHz).
+.Nm
+uses memory mapped buffers and metadata
+(buffer indexes and lengths) to communicate with the kernel,
+which is in charge of validating information through
+.Pa ioctl()
+and
+.Pa select()/poll().
+.Nm
+can exploit the parallelism in multiqueue devices and
+multicore systems.
+.Pp
+.Pp
+.Nm
+requires explicit support in device drivers.
+For a list of supported devices, see the end of this manual page.
+.Sh OPERATION
+.Nm
+clients must first open the
+.Pa open("/dev/netmap") ,
+and then issue an
+.Pa ioctl(...,NIOCREGIF,...)
+to bind the file descriptor to a network device.
+.Pp
+When a device is put in
+.Nm
+mode, its data path is disconnected from the host stack.
+The processes owning the file descriptor
+can exchange packets with the device, or with the host stack,
+through an mmapped memory region that contains pre-allocated
+buffers and metadata.
+.Pp
+Non blocking I/O is done with special
+.Pa ioctl()'s ,
+whereas the file descriptor can be passed to
+.Pa select()/poll()
+to be notified about incoming packet or available transmit buffers.
+.Ss Data structures
+All data structures for all devices in
+.Nm
+mode are in a memory
+region shared by the kernel and all processes
+who open
+.Pa /dev/netmap
+(NOTE: visibility may be restricted in future implementations).
+All references between the shared data structure
+are relative (offsets or indexes). Some macros help converting
+them into actual pointers.
+.Pp
+The data structures in shared memory are the following:
+.Pp
+.Bl -tag -width XXX
+.It Dv struct netmap_if (one per interface)
+indicates the number of rings supported by an interface, their
+sizes, and the offsets of the
+.Pa netmap_rings
+associated to the interface.
+The offset of a
+.Pa struct netmap_if
+in the shared memory region is indicated by the
+.Pa nr_offset
+field in the structure returned by the
+.Pa NIOCREGIF
+(see below).
+.Bd -literal
+struct netmap_if {
+ char ni_name[IFNAMSIZ]; /* name of the interface. */
+ const u_int ni_num_queues; /* number of hw ring pairs */
+ const ssize_t ring_ofs[]; /* offset of tx and rx rings */
+};
+.Ed
+.It Dv struct netmap_ring (one per ring)
+contains the index of the current read or write slot (cur),
+the number of slots available for reception or transmission (avail),
+and an array of
+.Pa slots
+describing the buffers.
+There is one ring pair for each of the N hardware ring pairs
+supported by the card (numbered 0..N-1), plus
+one ring pair (numbered N) for packets from/to the host stack.
+.Bd -literal
+struct netmap_ring {
+ const ssize_t buf_ofs;
+ const uint32_t num_slots; /* number of slots in the ring. */
+ uint32_t avail; /* number of usable slots */
+ uint32_t cur; /* 'current' index for the user side */
+
+ const uint16_t nr_buf_size;
+ uint16_t flags;
+ struct netmap_slot slot[0]; /* array of slots. */
+}
+.Ed
+.It Dv struct netmap_slot (one per packet)
+contains the metadata for a packet: a buffer index (buf_idx),
+a buffer length (len), and some flags.
+.Bd -literal
+struct netmap_slot {
+ uint32_t buf_idx; /* buffer index */
+ uint16_t len; /* packet length */
+ uint16_t flags; /* buf changed, etc. */
+#define NS_BUF_CHANGED 0x0001 /* must resync, buffer changed */
+#define NS_REPORT 0x0002 /* tell hw to report results
+ * e.g. by generating an interrupt
+ */
+};
+.Ed
+.It Dv packet buffers
+are fixed size (approximately 2k) buffers allocated by the kernel
+that contain packet data. Buffers addresses are computed through
+macros.
+.El
+.Pp
+Some macros support the access to objects in the shared memory
+region. In particular:
+.Bd -literal
+struct netmap_if *nifp;
+...
+struct netmap_ring *txring = NETMAP_TXRING(nifp, i);
+struct netmap_ring *rxring = NETMAP_RXRING(nifp, i);
+int i = txring->slot[txring->cur].buf_idx;
+char *buf = NETMAP_BUF(txring, i);
+.Ed
+.Ss IOCTLS
+.Pp
+.Nm
+supports some ioctl() to synchronize the state of the rings
+between the kernel and the user processes, plus some
+to query and configure the interface.
+The former do not require any argument, whereas the latter
+use a
+.Pa struct netmap_req
+defined as follows:
+.Bd -literal
+struct nmreq {
+ char nr_name[IFNAMSIZ];
+ uint32_t nr_offset; /* nifp offset in the shared region */
+ uint32_t nr_memsize; /* size of the shared region */
+ uint32_t nr_numdescs; /* descriptors per queue */
+ uint16_t nr_numqueues;
+ uint16_t nr_ringid; /* ring(s) we care about */
+#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */
+#define NETMAP_SW_RING 0x2000 /* we process the sw ring */
+#define NETMAP_NO_TX_POLL 0x1000 /* no gratuitous txsync on poll */
+#define NETMAP_RING_MASK 0xfff /* the actual ring number */
+};
+
+.Ed
+A device descriptor obtained through
+.Pa /dev/netmap
+also supports the ioctl supported by network devices.
+.Pp
+The netmap-specific
+.Xr ioctl 2
+command codes below are defined in
+.In net/netmap.h
+and are:
+.Bl -tag -width XXXX
+.It Dv NIOCGINFO
+returns information about the interface named in nr_name.
+On return, nr_memsize indicates the size of the shared netmap
+memory region (this is device-independent),
+nr_numslots indicates how many buffers are in a ring,
+nr_numrings indicates the number of rings supported by the hardware.
+.Pp
+If the device does not support netmap, the ioctl returns EINVAL.
+.It Dv NIOCREGIF
+puts the interface named in nr_name into netmap mode, disconnecting
+it from the host stack, and/or defines which rings are controlled
+through this file descriptor.
+On return, it gives the same info as NIOCGINFO, and nr_ringid
+indicates the identity of the rings controlled through the file
+descriptor.
+.Pp
+Possible values for nr_ringid are
+.Bl -tag -width XXXXX
+.It 0
+default, all hardware rings
+.It NETMAP_SW_RING
+the ``host rings'' connecting to the host stack
+.It NETMAP_HW_RING + i
+the i-th hardware ring
+.El
+By default, a
+.Nm poll
+or
+.Nm select
+call pushes out any pending packets on the transmit ring, even if
+no write events are specified.
+The feature can be disabled by or-ing
+.Nm NETMAP_NO_TX_SYNC
+to nr_ringid.
+But normally you should keep this feature unless you are using
+separate file descriptors for the send and receive rings, because
+otherwise packets are pushed out only if NETMAP_TXSYNC is called,
+or the send queue is full.
+.Pp
+.Pa NIOCREGIF
+can be used multiple times to change the association of a
+file descriptor to a ring pair, always within the same device.
+.It Dv NIOCUNREGIF
+brings an interface back to normal mode.
+.It Dv NIOCTXSYNC
+tells the hardware of new packets to transmit, and updates the
+number of slots available for transmission.
+.It Dv NIOCRXSYNC
+tells the hardware of consumed packets, and asks for newly available
+packets.
+.El
+.Ss SYSTEM CALLS
+.Nm
+uses
+.Nm select
+and
+.Nm poll
+to wake up processes when significant events occur.
+.Sh EXAMPLES
+The following code implements a traffic generator
+.Pp
+.Bd -literal -compact
+#include <net/netmap.h>
+#include <net/netmap_user.h>
+struct netmap_if *nifp;
+struct netmap_ring *ring;
+struct netmap_request nmr;
+
+fd = open("/dev/netmap", O_RDWR);
+bzero(&nmr, sizeof(nmr));
+strcpy(nmr.nm_name, "ix0");
+ioctl(fd, NIOCREG, &nmr);
+p = mmap(0, nmr.memsize, fd);
+nifp = NETMAP_IF(p, nmr.offset);
+ring = NETMAP_TXRING(nifp, 0);
+fds.fd = fd;
+fds.events = POLLOUT;
+for (;;) {
+ poll(list, 1, -1);
+ while (ring->avail-- > 0) {
+ i = ring->cur;
+ buf = NETMAP_BUF(ring, ring->slot[i].buf_index);
+ ... prepare packet in buf ...
+ ring->slot[i].len = ... packet length ...
+ ring->cur = NETMAP_RING_NEXT(ring, i);
+ }
+}
+.Ed
+.Sh SUPPORTED INTERFACES
+.Nm
+supports the following interfaces:
+.Xr em 4 ,
+.Xr ixgbe 4 ,
+.Xr re 4 ,
+.Sh AUTHORS
+The
+.Nm
+framework has been designed and implemented by
+.An Luigi Rizzo
+and
+.An Matteo Landi
+in 2011 at the Universita` di Pisa.
OpenPOWER on IntegriCloud