diff options
Diffstat (limited to 'share/man')
-rw-r--r-- | share/man/man4/Makefile | 1 | ||||
-rw-r--r-- | share/man/man4/netmap.4 | 300 |
2 files changed, 301 insertions, 0 deletions
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index e5fa955..0d5a780 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -253,6 +253,7 @@ MAN= aac.4 \ net80211.4 \ netgraph.4 \ netintro.4 \ + netmap.4 \ ${_nfe.4} \ ${_nfsmb.4} \ ng_async.4 \ diff --git a/share/man/man4/netmap.4 b/share/man/man4/netmap.4 new file mode 100644 index 0000000..8b646f9 --- /dev/null +++ b/share/man/man4/netmap.4 @@ -0,0 +1,300 @@ +.\" Copyright (c) 2011 Matteo Landi, Luigi Rizzo, Universita` di Pisa +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" This document is derived in part from the enet man page (enet.4) +.\" distributed with 4.3BSD Unix. +.\" +.\" $FreeBSD$ +.\" $Id: netmap.4 9662 2011-11-16 13:18:06Z luigi $: stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $ +.\" +.Dd November 16, 2011 +.Dt NETMAP 4 +.Os +.Sh NAME +.Nm netmap +.Nd a framework for fast packet I/O +.Sh SYNOPSIS +.Cd device netmap +.Sh DESCRIPTION +.Nm +is a framework for fast and safe access to network devices +(reaching 14.88 Mpps at less than 1 GHz). +.Nm +uses memory mapped buffers and metadata +(buffer indexes and lengths) to communicate with the kernel, +which is in charge of validating information through +.Pa ioctl() +and +.Pa select()/poll(). +.Nm +can exploit the parallelism in multiqueue devices and +multicore systems. +.Pp +.Pp +.Nm +requires explicit support in device drivers. +For a list of supported devices, see the end of this manual page. +.Sh OPERATION +.Nm +clients must first open the +.Pa open("/dev/netmap") , +and then issue an +.Pa ioctl(...,NIOCREGIF,...) +to bind the file descriptor to a network device. +.Pp +When a device is put in +.Nm +mode, its data path is disconnected from the host stack. +The processes owning the file descriptor +can exchange packets with the device, or with the host stack, +through an mmapped memory region that contains pre-allocated +buffers and metadata. +.Pp +Non blocking I/O is done with special +.Pa ioctl()'s , +whereas the file descriptor can be passed to +.Pa select()/poll() +to be notified about incoming packet or available transmit buffers. +.Ss Data structures +All data structures for all devices in +.Nm +mode are in a memory +region shared by the kernel and all processes +who open +.Pa /dev/netmap +(NOTE: visibility may be restricted in future implementations). +All references between the shared data structure +are relative (offsets or indexes). Some macros help converting +them into actual pointers. +.Pp +The data structures in shared memory are the following: +.Pp +.Bl -tag -width XXX +.It Dv struct netmap_if (one per interface) +indicates the number of rings supported by an interface, their +sizes, and the offsets of the +.Pa netmap_rings +associated to the interface. +The offset of a +.Pa struct netmap_if +in the shared memory region is indicated by the +.Pa nr_offset +field in the structure returned by the +.Pa NIOCREGIF +(see below). +.Bd -literal +struct netmap_if { + char ni_name[IFNAMSIZ]; /* name of the interface. */ + const u_int ni_num_queues; /* number of hw ring pairs */ + const ssize_t ring_ofs[]; /* offset of tx and rx rings */ +}; +.Ed +.It Dv struct netmap_ring (one per ring) +contains the index of the current read or write slot (cur), +the number of slots available for reception or transmission (avail), +and an array of +.Pa slots +describing the buffers. +There is one ring pair for each of the N hardware ring pairs +supported by the card (numbered 0..N-1), plus +one ring pair (numbered N) for packets from/to the host stack. +.Bd -literal +struct netmap_ring { + const ssize_t buf_ofs; + const uint32_t num_slots; /* number of slots in the ring. */ + uint32_t avail; /* number of usable slots */ + uint32_t cur; /* 'current' index for the user side */ + + const uint16_t nr_buf_size; + uint16_t flags; + struct netmap_slot slot[0]; /* array of slots. */ +} +.Ed +.It Dv struct netmap_slot (one per packet) +contains the metadata for a packet: a buffer index (buf_idx), +a buffer length (len), and some flags. +.Bd -literal +struct netmap_slot { + uint32_t buf_idx; /* buffer index */ + uint16_t len; /* packet length */ + uint16_t flags; /* buf changed, etc. */ +#define NS_BUF_CHANGED 0x0001 /* must resync, buffer changed */ +#define NS_REPORT 0x0002 /* tell hw to report results + * e.g. by generating an interrupt + */ +}; +.Ed +.It Dv packet buffers +are fixed size (approximately 2k) buffers allocated by the kernel +that contain packet data. Buffers addresses are computed through +macros. +.El +.Pp +Some macros support the access to objects in the shared memory +region. In particular: +.Bd -literal +struct netmap_if *nifp; +... +struct netmap_ring *txring = NETMAP_TXRING(nifp, i); +struct netmap_ring *rxring = NETMAP_RXRING(nifp, i); +int i = txring->slot[txring->cur].buf_idx; +char *buf = NETMAP_BUF(txring, i); +.Ed +.Ss IOCTLS +.Pp +.Nm +supports some ioctl() to synchronize the state of the rings +between the kernel and the user processes, plus some +to query and configure the interface. +The former do not require any argument, whereas the latter +use a +.Pa struct netmap_req +defined as follows: +.Bd -literal +struct nmreq { + char nr_name[IFNAMSIZ]; + uint32_t nr_offset; /* nifp offset in the shared region */ + uint32_t nr_memsize; /* size of the shared region */ + uint32_t nr_numdescs; /* descriptors per queue */ + uint16_t nr_numqueues; + uint16_t nr_ringid; /* ring(s) we care about */ +#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */ +#define NETMAP_SW_RING 0x2000 /* we process the sw ring */ +#define NETMAP_NO_TX_POLL 0x1000 /* no gratuitous txsync on poll */ +#define NETMAP_RING_MASK 0xfff /* the actual ring number */ +}; + +.Ed +A device descriptor obtained through +.Pa /dev/netmap +also supports the ioctl supported by network devices. +.Pp +The netmap-specific +.Xr ioctl 2 +command codes below are defined in +.In net/netmap.h +and are: +.Bl -tag -width XXXX +.It Dv NIOCGINFO +returns information about the interface named in nr_name. +On return, nr_memsize indicates the size of the shared netmap +memory region (this is device-independent), +nr_numslots indicates how many buffers are in a ring, +nr_numrings indicates the number of rings supported by the hardware. +.Pp +If the device does not support netmap, the ioctl returns EINVAL. +.It Dv NIOCREGIF +puts the interface named in nr_name into netmap mode, disconnecting +it from the host stack, and/or defines which rings are controlled +through this file descriptor. +On return, it gives the same info as NIOCGINFO, and nr_ringid +indicates the identity of the rings controlled through the file +descriptor. +.Pp +Possible values for nr_ringid are +.Bl -tag -width XXXXX +.It 0 +default, all hardware rings +.It NETMAP_SW_RING +the ``host rings'' connecting to the host stack +.It NETMAP_HW_RING + i +the i-th hardware ring +.El +By default, a +.Nm poll +or +.Nm select +call pushes out any pending packets on the transmit ring, even if +no write events are specified. +The feature can be disabled by or-ing +.Nm NETMAP_NO_TX_SYNC +to nr_ringid. +But normally you should keep this feature unless you are using +separate file descriptors for the send and receive rings, because +otherwise packets are pushed out only if NETMAP_TXSYNC is called, +or the send queue is full. +.Pp +.Pa NIOCREGIF +can be used multiple times to change the association of a +file descriptor to a ring pair, always within the same device. +.It Dv NIOCUNREGIF +brings an interface back to normal mode. +.It Dv NIOCTXSYNC +tells the hardware of new packets to transmit, and updates the +number of slots available for transmission. +.It Dv NIOCRXSYNC +tells the hardware of consumed packets, and asks for newly available +packets. +.El +.Ss SYSTEM CALLS +.Nm +uses +.Nm select +and +.Nm poll +to wake up processes when significant events occur. +.Sh EXAMPLES +The following code implements a traffic generator +.Pp +.Bd -literal -compact +#include <net/netmap.h> +#include <net/netmap_user.h> +struct netmap_if *nifp; +struct netmap_ring *ring; +struct netmap_request nmr; + +fd = open("/dev/netmap", O_RDWR); +bzero(&nmr, sizeof(nmr)); +strcpy(nmr.nm_name, "ix0"); +ioctl(fd, NIOCREG, &nmr); +p = mmap(0, nmr.memsize, fd); +nifp = NETMAP_IF(p, nmr.offset); +ring = NETMAP_TXRING(nifp, 0); +fds.fd = fd; +fds.events = POLLOUT; +for (;;) { + poll(list, 1, -1); + while (ring->avail-- > 0) { + i = ring->cur; + buf = NETMAP_BUF(ring, ring->slot[i].buf_index); + ... prepare packet in buf ... + ring->slot[i].len = ... packet length ... + ring->cur = NETMAP_RING_NEXT(ring, i); + } +} +.Ed +.Sh SUPPORTED INTERFACES +.Nm +supports the following interfaces: +.Xr em 4 , +.Xr ixgbe 4 , +.Xr re 4 , +.Sh AUTHORS +The +.Nm +framework has been designed and implemented by +.An Luigi Rizzo +and +.An Matteo Landi +in 2011 at the Universita` di Pisa. |