diff options
author | melifaro <melifaro@FreeBSD.org> | 2014-08-23 14:58:31 +0000 |
---|---|---|
committer | melifaro <melifaro@FreeBSD.org> | 2014-08-23 14:58:31 +0000 |
commit | cf94663e69b2c927e4a44dcb922c93483f11bc29 (patch) | |
tree | f43a461c97f3db054606f6367939a61652f0db97 /sys/dev/netmap/netmap_monitor.c | |
parent | 2e65f120c886a9d09b274b1953783df2b995e799 (diff) | |
parent | 19be009a4f8eb0d239ec3e465b0a9b2a2947dcf8 (diff) | |
download | FreeBSD-src-cf94663e69b2c927e4a44dcb922c93483f11bc29.zip FreeBSD-src-cf94663e69b2c927e4a44dcb922c93483f11bc29.tar.gz |
Sync to HEAD@r270409.
Diffstat (limited to 'sys/dev/netmap/netmap_monitor.c')
-rw-r--r-- | sys/dev/netmap/netmap_monitor.c | 498 |
1 files changed, 498 insertions, 0 deletions
diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c new file mode 100644 index 0000000..485c370 --- /dev/null +++ b/sys/dev/netmap/netmap_monitor.c @@ -0,0 +1,498 @@ +/* + * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $FreeBSD$ + * + * Monitors + * + * netmap monitors can be used to do zero-copy monitoring of network traffic + * on another adapter, when the latter adapter is working in netmap mode. + * + * Monitors offer to userspace the same interface as any other netmap port, + * with as many pairs of netmap rings as the monitored adapter. + * However, only the rx rings are actually used. Each monitor rx ring receives + * the traffic transiting on both the tx and rx corresponding rings in the + * monitored adapter. During registration, the user can choose if she wants + * to intercept tx only, rx only, or both tx and rx traffic. + * + * The monitor only sees the frames after they have been consumed in the + * monitored adapter: + * + * - For tx traffic, this is after the slots containing the frames have been + * marked as free. Note that this may happen at a considerably delay after + * frame transmission, since freeing of slots is often done lazily. + * + * - For rx traffic, this is after the consumer on the monitored adapter + * has released them. In most cases, the consumer is a userspace + * application which may have modified the frame contents. + * + * If the monitor is not able to cope with the stream of frames, excess traffic + * will be dropped. + * + * Each ring can be monitored by at most one monitor. This may change in the + * future, if we implement monitor chaining. + * + */ + + +#if defined(__FreeBSD__) +#include <sys/cdefs.h> /* prerequisite */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/param.h> /* defines used in kernel.h */ +#include <sys/kernel.h> /* types used in module initialization */ +#include <sys/malloc.h> +#include <sys/poll.h> +#include <sys/lock.h> +#include <sys/rwlock.h> +#include <sys/selinfo.h> +#include <sys/sysctl.h> +#include <sys/socket.h> /* sockaddrs */ +#include <net/if.h> +#include <net/if_var.h> +#include <machine/bus.h> /* bus_dmamap_* */ +#include <sys/refcount.h> + + +#elif defined(linux) + +#include "bsd_glue.h" + +#elif defined(__APPLE__) + +#warning OSX support is only partial +#include "osx_glue.h" + +#else + +#error Unsupported platform + +#endif /* unsupported */ + +/* + * common headers + */ + +#include <net/netmap.h> +#include <dev/netmap/netmap_kern.h> +#include <dev/netmap/netmap_mem2.h> + +#ifdef WITH_MONITOR + +#define NM_MONITOR_MAXSLOTS 4096 + +/* monitor works by replacing the nm_sync callbacks in the monitored rings. + * The actions to be performed are the same on both tx and rx rings, so we + * have collected them here + */ +static int +netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr) +{ + struct netmap_monitor_adapter *mna = kring->monitor; + struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id]; + struct netmap_ring *ring = kring->ring, *mring = mkring->ring; + int error; + int rel_slots, free_slots, busy; + u_int beg, end, i; + u_int lim = kring->nkr_num_slots - 1, + mlim = mkring->nkr_num_slots - 1; + + /* get the relased slots (rel_slots) */ + beg = *ringptr; + error = kring->save_sync(kring, flags); + if (error) + return error; + end = *ringptr; + rel_slots = end - beg; + if (rel_slots < 0) + rel_slots += kring->nkr_num_slots; + + if (!rel_slots) { + return 0; + } + + /* we need to lock the monitor receive ring, since it + * is the target of bot tx and rx traffic from the monitored + * adapter + */ + mtx_lock(&mkring->q_lock); + /* get the free slots available on the monitor ring */ + i = mkring->nr_hwtail; + busy = i - mkring->nr_hwcur; + if (busy < 0) + busy += mkring->nkr_num_slots; + free_slots = mlim - busy; + + if (!free_slots) { + mtx_unlock(&mkring->q_lock); + return 0; + } + + /* swap min(free_slots, rel_slots) slots */ + if (free_slots < rel_slots) { + beg += (rel_slots - free_slots); + if (beg > lim) + beg = 0; + rel_slots = free_slots; + } + + for ( ; rel_slots; rel_slots--) { + struct netmap_slot *s = &ring->slot[beg]; + struct netmap_slot *ms = &mring->slot[i]; + uint32_t tmp; + + tmp = ms->buf_idx; + ms->buf_idx = s->buf_idx; + s->buf_idx = tmp; + + tmp = ms->len; + ms->len = s->len; + s->len = tmp; + + s->flags |= NS_BUF_CHANGED; + + beg = nm_next(beg, lim); + i = nm_next(i, mlim); + + } + wmb(); + mkring->nr_hwtail = i; + + mtx_unlock(&mkring->q_lock); + /* notify the new frames to the monitor */ + mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0); + return 0; +} + +/* callback used to replace the nm_sync callback in the monitored tx rings */ +static int +netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags) +{ + ND("%s %x", kring->name, flags); + return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail); +} + +/* callback used to replace the nm_sync callback in the monitored rx rings */ +static int +netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags) +{ + ND("%s %x", kring->name, flags); + return netmap_monitor_parent_sync(kring, flags, &kring->rcur); +} + +/* nm_sync callback for the monitor's own tx rings. + * This makes no sense and always returns error + */ +static int +netmap_monitor_txsync(struct netmap_kring *kring, int flags) +{ + D("%s %x", kring->name, flags); + return EIO; +} + +/* nm_sync callback for the monitor's own rx rings. + * Note that the lock in netmap_monitor_parent_sync only protects + * writers among themselves. Synchronization between writers + * (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync) + * and readers (i.e., netmap_monitor_rxsync) relies on memory barriers. + */ +static int +netmap_monitor_rxsync(struct netmap_kring *kring, int flags) +{ + ND("%s %x", kring->name, flags); + kring->nr_hwcur = kring->rcur; + rmb(); + nm_rxsync_finalize(kring); + return 0; +} + +/* nm_krings_create callbacks for monitors. + * We could use the default netmap_hw_krings_monitor, but + * we don't need the mbq. + */ +static int +netmap_monitor_krings_create(struct netmap_adapter *na) +{ + return netmap_krings_create(na, 0); +} + + +/* nm_register callback for monitors. + * + * On registration, replace the nm_sync callbacks in the monitored + * rings with our own, saving the previous ones in the monitored + * rings themselves, where they are used by netmap_monitor_parent_sync. + * + * On de-registration, restore the original callbacks. We need to + * stop traffic while we are doing this, since the monitored adapter may + * have already started executing a netmap_monitor_parent_sync + * and may not like the kring->save_sync pointer to become NULL. + */ +static int +netmap_monitor_reg(struct netmap_adapter *na, int onoff) +{ + struct netmap_monitor_adapter *mna = + (struct netmap_monitor_adapter *)na; + struct netmap_priv_d *priv = &mna->priv; + struct netmap_adapter *pna = priv->np_na; + struct netmap_kring *kring; + int i; + + ND("%p: onoff %d", na, onoff); + if (onoff) { + if (!nm_netmap_on(pna)) { + /* parent left netmap mode, fatal */ + return ENXIO; + } + if (mna->flags & NR_MONITOR_TX) { + for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { + kring = &pna->tx_rings[i]; + kring->save_sync = kring->nm_sync; + kring->nm_sync = netmap_monitor_parent_txsync; + } + } + if (mna->flags & NR_MONITOR_RX) { + for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { + kring = &pna->rx_rings[i]; + kring->save_sync = kring->nm_sync; + kring->nm_sync = netmap_monitor_parent_rxsync; + } + } + na->na_flags |= NAF_NETMAP_ON; + } else { + if (!nm_netmap_on(pna)) { + /* parent left netmap mode, nothing to restore */ + return 0; + } + na->na_flags &= ~NAF_NETMAP_ON; + if (mna->flags & NR_MONITOR_TX) { + for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { + netmap_set_txring(pna, i, 1 /* stopped */); + kring = &pna->tx_rings[i]; + kring->nm_sync = kring->save_sync; + kring->save_sync = NULL; + netmap_set_txring(pna, i, 0 /* enabled */); + } + } + if (mna->flags & NR_MONITOR_RX) { + for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { + netmap_set_rxring(pna, i, 1 /* stopped */); + kring = &pna->rx_rings[i]; + kring->nm_sync = kring->save_sync; + kring->save_sync = NULL; + netmap_set_rxring(pna, i, 0 /* enabled */); + } + } + } + return 0; +} +/* nm_krings_delete callback for monitors */ +static void +netmap_monitor_krings_delete(struct netmap_adapter *na) +{ + netmap_krings_delete(na); +} + + +/* nm_dtor callback for monitors */ +static void +netmap_monitor_dtor(struct netmap_adapter *na) +{ + struct netmap_monitor_adapter *mna = + (struct netmap_monitor_adapter *)na; + struct netmap_priv_d *priv = &mna->priv; + struct netmap_adapter *pna = priv->np_na; + int i; + + ND("%p", na); + if (nm_netmap_on(pna)) { + /* parent still in netmap mode, mark its krings as free */ + if (mna->flags & NR_MONITOR_TX) { + for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { + pna->tx_rings[i].monitor = NULL; + } + } + if (mna->flags & NR_MONITOR_RX) { + for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { + pna->rx_rings[i].monitor = NULL; + } + } + } + netmap_adapter_put(pna); +} + + +/* check if nmr is a request for a monitor adapter that we can satisfy */ +int +netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create) +{ + struct nmreq pnmr; + struct netmap_adapter *pna; /* parent adapter */ + struct netmap_monitor_adapter *mna; + int i, error; + + if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) { + ND("not a monitor"); + return 0; + } + /* this is a request for a monitor adapter */ + + D("flags %x", nmr->nr_flags); + + mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO); + if (mna == NULL) { + D("memory error"); + return ENOMEM; + } + + /* first, try to find the adapter that we want to monitor + * We use the same nmr, after we have turned off the monitor flags. + * In this way we can potentially monitor everything netmap understands, + * except other monitors. + */ + memcpy(&pnmr, nmr, sizeof(pnmr)); + pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX); + error = netmap_get_na(&pnmr, &pna, create); + if (error) { + D("parent lookup failed: %d", error); + return error; + } + D("found parent: %s", pna->name); + + if (!nm_netmap_on(pna)) { + /* parent not in netmap mode */ + /* XXX we can wait for the parent to enter netmap mode, + * by intercepting its nm_register callback (2014-03-16) + */ + D("%s not in netmap mode", pna->name); + error = EINVAL; + goto put_out; + } + + /* grab all the rings we need in the parent */ + mna->priv.np_na = pna; + error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags); + if (error) { + D("ringid error"); + goto put_out; + } + if (nmr->nr_flags & NR_MONITOR_TX) { + for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) { + struct netmap_kring *kring = &pna->tx_rings[i]; + if (kring->monitor) { + error = EBUSY; + D("ring busy"); + goto release_out; + } + kring->monitor = mna; + } + } + if (nmr->nr_flags & NR_MONITOR_RX) { + for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) { + struct netmap_kring *kring = &pna->rx_rings[i]; + if (kring->monitor) { + error = EBUSY; + D("ring busy"); + goto release_out; + } + kring->monitor = mna; + } + } + + snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name); + + /* the monitor supports the host rings iff the parent does */ + mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS); + mna->up.nm_txsync = netmap_monitor_txsync; + mna->up.nm_rxsync = netmap_monitor_rxsync; + mna->up.nm_register = netmap_monitor_reg; + mna->up.nm_dtor = netmap_monitor_dtor; + mna->up.nm_krings_create = netmap_monitor_krings_create; + mna->up.nm_krings_delete = netmap_monitor_krings_delete; + mna->up.nm_mem = pna->nm_mem; + mna->up.na_lut = pna->na_lut; + mna->up.na_lut_objtotal = pna->na_lut_objtotal; + mna->up.na_lut_objsize = pna->na_lut_objsize; + + mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero + /* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings) + * in the parent + */ + mna->up.num_rx_rings = pna->num_rx_rings; + if (pna->num_tx_rings > pna->num_rx_rings) + mna->up.num_rx_rings = pna->num_tx_rings; + /* by default, the number of slots is the same as in + * the parent rings, but the user may ask for a different + * number + */ + mna->up.num_tx_desc = nmr->nr_tx_slots; + nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc, + 1, NM_MONITOR_MAXSLOTS, NULL); + mna->up.num_rx_desc = nmr->nr_rx_slots; + nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc, + 1, NM_MONITOR_MAXSLOTS, NULL); + error = netmap_attach_common(&mna->up); + if (error) { + D("attach_common error"); + goto release_out; + } + + /* remember the traffic directions we have to monitor */ + mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)); + + *na = &mna->up; + netmap_adapter_get(*na); + + /* write the configuration back */ + nmr->nr_tx_rings = mna->up.num_tx_rings; + nmr->nr_rx_rings = mna->up.num_rx_rings; + nmr->nr_tx_slots = mna->up.num_tx_desc; + nmr->nr_rx_slots = mna->up.num_rx_desc; + + /* keep the reference to the parent */ + D("monitor ok"); + + return 0; + +release_out: + D("monitor error"); + for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) { + if (pna->tx_rings[i].monitor == mna) + pna->tx_rings[i].monitor = NULL; + } + for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) { + if (pna->rx_rings[i].monitor == mna) + pna->rx_rings[i].monitor = NULL; + } +put_out: + netmap_adapter_put(pna); + free(mna, M_DEVBUF); + return error; +} + + +#endif /* WITH_MONITOR */ |