summaryrefslogtreecommitdiffstats
path: root/sys/dev/netmap
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2014-06-09 15:24:45 +0000
committerluigi <luigi@FreeBSD.org>2014-06-09 15:24:45 +0000
commitebbf6b80b7c61883bee08d58e7291d274b4201a0 (patch)
treea448df913c38e0849c39af93fd624da3bf845225 /sys/dev/netmap
parent2472187c4f707df7b460212773ffae872c03d247 (diff)
downloadFreeBSD-src-ebbf6b80b7c61883bee08d58e7291d274b4201a0.zip
FreeBSD-src-ebbf6b80b7c61883bee08d58e7291d274b4201a0.tar.gz
sync netmap code with the version in HEAD:
- fix handling of tx mbufs in emulated netmap mode; - introduce mbq_lock() and mbq_unlock() - rate limit some error messages - many whitespace and comment fixes
Diffstat (limited to 'sys/dev/netmap')
-rw-r--r--sys/dev/netmap/netmap.c132
-rw-r--r--sys/dev/netmap/netmap_freebsd.c71
-rw-r--r--sys/dev/netmap/netmap_generic.c60
-rw-r--r--sys/dev/netmap/netmap_kern.h21
-rw-r--r--sys/dev/netmap/netmap_mbq.c8
-rw-r--r--sys/dev/netmap/netmap_mbq.h12
-rw-r--r--sys/dev/netmap/netmap_mem2.c2
-rw-r--r--sys/dev/netmap/netmap_pipe.c14
-rw-r--r--sys/dev/netmap/netmap_vale.c18
9 files changed, 257 insertions, 81 deletions
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index 6fd8028..274a7d7 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -270,6 +270,7 @@ netmap_disable_ring(struct netmap_kring *kr)
}
+/* stop or enable all the rings of na */
static void
netmap_set_all_rings(struct ifnet *ifp, int stopped)
{
@@ -303,6 +304,13 @@ netmap_set_all_rings(struct ifnet *ifp, int stopped)
}
+/*
+ * Convenience function used in drivers. Waits for current txsync()s/rxsync()s
+ * to finish and prevents any new one from starting. Call this before turning
+ * netmap mode off, or before removing the harware rings (e.g., on module
+ * onload). As a rule of thumb for linux drivers, this should be placed near
+ * each napi_disable().
+ */
void
netmap_disable_all_rings(struct ifnet *ifp)
{
@@ -310,6 +318,11 @@ netmap_disable_all_rings(struct ifnet *ifp)
}
+/*
+ * Convenience function used in drivers. Re-enables rxsync and txsync on the
+ * adapter's rings In linux drivers, this should be placed near each
+ * napi_enable().
+ */
void
netmap_enable_all_rings(struct ifnet *ifp)
{
@@ -393,6 +406,7 @@ nm_dump_buf(char *p, int len, int lim, char *dst)
* Fetch configuration from the device, to cope with dynamic
* reconfigurations after loading the module.
*/
+/* call with NMG_LOCK held */
int
netmap_update_config(struct netmap_adapter *na)
{
@@ -447,18 +461,20 @@ netmap_rxsync_compat(struct netmap_kring *kring, int flags)
return na->nm_rxsync(na, kring->ring_id, flags);
}
+/* kring->nm_sync callback for the host tx ring */
static int
netmap_txsync_to_host_compat(struct netmap_kring *kring, int flags)
{
- (void)flags;
+ (void)flags; /* unused */
netmap_txsync_to_host(kring->na);
return 0;
}
+/* kring->nm_sync callback for the host rx ring */
static int
netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags)
{
- (void)flags;
+ (void)flags; /* unused */
netmap_rxsync_from_host(kring->na, NULL, NULL);
return 0;
}
@@ -489,6 +505,7 @@ netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags)
* Note: for compatibility, host krings are created even when not needed.
* The tailroom space is currently used by vale ports for allocating leases.
*/
+/* call with NMG_LOCK held */
int
netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
{
@@ -567,6 +584,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
/* undo the actions performed by netmap_krings_create */
+/* call with NMG_LOCK held */
void
netmap_krings_delete(struct netmap_adapter *na)
{
@@ -586,6 +604,7 @@ netmap_krings_delete(struct netmap_adapter *na)
* on the rings connected to the host so we need to purge
* them first.
*/
+/* call with NMG_LOCK held */
static void
netmap_hw_krings_delete(struct netmap_adapter *na)
{
@@ -598,6 +617,12 @@ netmap_hw_krings_delete(struct netmap_adapter *na)
}
+/* create a new netmap_if for a newly registered fd.
+ * If this is the first registration of the adapter,
+ * also create the netmap rings and their in-kernel view,
+ * the netmap krings.
+ */
+/* call with NMG_LOCK held */
static struct netmap_if*
netmap_if_new(const char *ifname, struct netmap_adapter *na)
{
@@ -608,17 +633,23 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
return NULL;
}
- if (na->active_fds)
+ if (na->active_fds) /* already registered */
goto final;
+ /* create and init the krings arrays.
+ * Depending on the adapter, this may also create
+ * the netmap rings themselves
+ */
if (na->nm_krings_create(na))
goto cleanup;
+ /* create all missing netmap rings */
if (netmap_mem_rings_create(na))
goto cleanup;
final:
+ /* in all cases, create a new netmap if */
nifp = netmap_mem_if_new(ifname, na);
if (nifp == NULL)
goto cleanup;
@@ -638,8 +669,8 @@ cleanup:
/* grab a reference to the memory allocator, if we don't have one already. The
* reference is taken from the netmap_adapter registered with the priv.
- *
*/
+/* call with NMG_LOCK held */
static int
netmap_get_memory_locked(struct netmap_priv_d* p)
{
@@ -672,6 +703,7 @@ netmap_get_memory_locked(struct netmap_priv_d* p)
}
+/* call with NMG_LOCK *not* held */
int
netmap_get_memory(struct netmap_priv_d* p)
{
@@ -683,6 +715,7 @@ netmap_get_memory(struct netmap_priv_d* p)
}
+/* call with NMG_LOCK held */
static int
netmap_have_memory_locked(struct netmap_priv_d* p)
{
@@ -690,6 +723,7 @@ netmap_have_memory_locked(struct netmap_priv_d* p)
}
+/* call with NMG_LOCK held */
static void
netmap_drop_memory_locked(struct netmap_priv_d* p)
{
@@ -755,6 +789,7 @@ netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
netmap_mem_if_delete(na, nifp);
}
+/* call with NMG_LOCK held */
static __inline int
nm_tx_si_user(struct netmap_priv_d *priv)
{
@@ -762,6 +797,7 @@ nm_tx_si_user(struct netmap_priv_d *priv)
(priv->np_txqlast - priv->np_txqfirst > 1));
}
+/* call with NMG_LOCK held */
static __inline int
nm_rx_si_user(struct netmap_priv_d *priv)
{
@@ -771,8 +807,12 @@ nm_rx_si_user(struct netmap_priv_d *priv)
/*
+ * Destructor of the netmap_priv_d, called when the fd has
+ * no active open() and mmap(). Also called in error paths.
+ *
* returns 1 if this is the last instance and we can free priv
*/
+/* call with NMG_LOCK held */
int
netmap_dtor_locked(struct netmap_priv_d *priv)
{
@@ -805,6 +845,7 @@ netmap_dtor_locked(struct netmap_priv_d *priv)
}
+/* call with NMG_LOCK *not* held */
void
netmap_dtor(void *data)
{
@@ -1009,7 +1050,7 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
(void)pwait; /* disable unused warnings */
(void)td;
- mtx_lock(&q->lock);
+ mbq_lock(q);
/* First part: import newly received packets */
n = mbq_len(q);
@@ -1019,7 +1060,7 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
nm_i = kring->nr_hwtail;
stop_i = nm_prev(nm_i, lim);
- while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
+ while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
int len = MBUF_LEN(m);
struct netmap_slot *slot = &ring->slot[nm_i];
@@ -1051,7 +1092,7 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
if (kring->rcur == kring->rtail && td) /* no bufs available */
selrecord(td, &kring->si);
- mtx_unlock(&q->lock);
+ mbq_unlock(q);
return ret;
}
@@ -1194,6 +1235,12 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
if (*na != NULL) /* valid match in netmap_get_bdg_na() */
goto pipes;
+ /*
+ * This must be a hardware na, lookup the name in the system.
+ * Note that by hardware we actually mean "it shows up in ifconfig".
+ * This may still be a tap, a veth/epair, or even a
+ * persistent VALE port.
+ */
ifp = ifunit_ref(nmr->nr_name);
if (ifp == NULL) {
return ENXIO;
@@ -1212,6 +1259,11 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
netmap_adapter_get(ret);
pipes:
+ /*
+ * If we are opening a pipe whose parent was not in netmap mode,
+ * we have to allocate the pipe array now.
+ * XXX get rid of this clumsiness (2014-03-15)
+ */
error = netmap_pipe_alloc(*na, nmr);
out:
@@ -1219,7 +1271,7 @@ out:
netmap_adapter_put(ret);
if (ifp)
- if_rele(ifp);
+ if_rele(ifp); /* allow live unloading of drivers modules */
return error;
}
@@ -1515,7 +1567,7 @@ netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
if (nm_rx_si_user(priv))
na->rx_si_users++;
if (netmap_verbose) {
- D("%s: tx [%d,%d) rx [%d,%d) id %d",
+ D("%s: tx [%d,%d) rx [%d,%d) id %d",
NM_IFPNAME(na->ifp),
priv->np_txqfirst,
priv->np_txqlast,
@@ -1555,10 +1607,9 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
goto out;
}
nifp = netmap_if_new(NM_IFPNAME(ifp), na);
+
+ /* Allocate a netmap_if and, if necessary, all the netmap_ring's */
if (nifp == NULL) { /* allocation failed */
- /* we should drop the allocator, but only
- * if we were the ones who grabbed it
- */
error = ENOMEM;
goto out;
}
@@ -1568,10 +1619,8 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
} else {
/* Otherwise set the card in netmap mode
* and make it use the shared buffers.
- *
- * do not core lock because the race is harmless here,
- * there cannot be any traffic to netmap_transmit()
*/
+ /* cache the allocator info in the na */
na->na_lut = na->nm_mem->pools[NETMAP_BUF_POOL].lut;
ND("%p->na_lut == %p", na, na->na_lut);
na->na_lut_objtotal = na->nm_mem->pools[NETMAP_BUF_POOL].objtotal;
@@ -1585,6 +1634,9 @@ out:
*err = error;
if (error) {
priv->np_na = NULL;
+ /* we should drop the allocator, but only
+ * if we were the ones who grabbed it
+ */
if (need_mem)
netmap_drop_memory_locked(priv);
}
@@ -2008,6 +2060,12 @@ flush_tx:
continue;
/* only one thread does txsync */
if (nm_kr_tryget(kring)) {
+ /* either busy or stopped
+ * XXX if the ring is stopped, sleeping would
+ * be better. In current code, however, we only
+ * stop the rings for brief intervals (2014-03-14)
+ */
+
if (netmap_verbose)
RD(2, "%p lost race on txring %d, ok",
priv, i);
@@ -2049,7 +2107,7 @@ flush_tx:
*/
if (want_rx) {
int send_down = 0; /* transparent mode */
- /* two rounds here to for race avoidance */
+ /* two rounds here for race avoidance */
do_retry_rx:
for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
int found = 0;
@@ -2120,7 +2178,7 @@ do_retry_rx:
* Transparent mode: marked bufs on rx rings between
* kring->nr_hwcur and ring->head
* are passed to the other endpoint.
- *
+ *
* In this mode we also scan the sw rxring, which in
* turn passes packets up.
*
@@ -2139,6 +2197,7 @@ do_retry_rx:
static int netmap_hw_krings_create(struct netmap_adapter *);
+/* default notify callback */
static int
netmap_notify(struct netmap_adapter *na, u_int n_ring,
enum txrx tx, int flags)
@@ -2148,11 +2207,16 @@ netmap_notify(struct netmap_adapter *na, u_int n_ring,
if (tx == NR_TX) {
kring = na->tx_rings + n_ring;
OS_selwakeup(&kring->si, PI_NET);
+ /* optimization: avoid a wake up on the global
+ * queue if nobody has registered for more
+ * than one ring
+ */
if (na->tx_si_users > 0)
OS_selwakeup(&na->tx_si, PI_NET);
} else {
kring = na->rx_rings + n_ring;
OS_selwakeup(&kring->si, PI_NET);
+ /* optimization: same as above */
if (na->rx_si_users > 0)
OS_selwakeup(&na->rx_si, PI_NET);
}
@@ -2160,7 +2224,11 @@ netmap_notify(struct netmap_adapter *na, u_int n_ring,
}
-// XXX check handling of failures
+/* called by all routines that create netmap_adapters.
+ * Attach na to the ifp (if any) and provide defaults
+ * for optional callbacks. Defaults assume that we
+ * are creating an hardware netmap_adapter.
+ */
int
netmap_attach_common(struct netmap_adapter *na)
{
@@ -2182,6 +2250,10 @@ netmap_attach_common(struct netmap_adapter *na)
NETMAP_SET_CAPABLE(ifp);
if (na->nm_krings_create == NULL) {
+ /* we assume that we have been called by a driver,
+ * since other port types all provide their own
+ * nm_krings_create
+ */
na->nm_krings_create = netmap_hw_krings_create;
na->nm_krings_delete = netmap_hw_krings_delete;
}
@@ -2195,10 +2267,11 @@ netmap_attach_common(struct netmap_adapter *na)
}
+/* standard cleanup, called by all destructors */
void
netmap_detach_common(struct netmap_adapter *na)
{
- if (na->ifp)
+ if (na->ifp != NULL)
WNA(na->ifp) = NULL; /* XXX do we need this? */
if (na->tx_rings) { /* XXX should not happen */
@@ -2255,12 +2328,17 @@ netmap_attach(struct netmap_adapter *arg)
hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
#endif /* linux */
- D("success for %s", NM_IFPNAME(ifp));
+ D("success for %s tx %d/%d rx %d/%d queues/slots",
+ NM_IFPNAME(ifp),
+ hwna->up.num_tx_rings, hwna->up.num_tx_desc,
+ hwna->up.num_rx_rings, hwna->up.num_rx_desc
+ );
return 0;
fail:
D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
- netmap_detach(ifp);
+ if (ifp)
+ netmap_detach(ifp);
return (hwna ? EINVAL : ENOMEM);
}
@@ -2294,6 +2372,7 @@ NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
return 1;
}
+/* nm_krings_create callback for all hardware native adapters */
int
netmap_hw_krings_create(struct netmap_adapter *na)
{
@@ -2309,8 +2388,7 @@ netmap_hw_krings_create(struct netmap_adapter *na)
/*
- * Free the allocated memory linked to the given ``netmap_adapter``
- * object.
+ * Called on module unload by the netmap-enabled drivers
*/
void
netmap_detach(struct ifnet *ifp)
@@ -2381,7 +2459,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
* not possible on Linux).
* Also avoid overflowing the queue.
*/
- mtx_lock(&q->lock);
+ mbq_lock(q);
space = kring->nr_hwtail - kring->nr_hwcur;
if (space < 0)
@@ -2398,13 +2476,17 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
m = NULL;
error = 0;
}
- mtx_unlock(&q->lock);
+ mbq_unlock(q);
done:
if (m)
m_freem(m);
/* unconditionally wake up listeners */
na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
+ /* this is normally netmap_notify(), but for nics
+ * connected to a bridge it is netmap_bwrap_intr_notify(),
+ * that possibly forwards the frames through the switch
+ */
return (error);
}
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index 6572ca1..e43d669 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -61,7 +61,8 @@
/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
-rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
+rawsum_t
+nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
{
/* TODO XXX please use the FreeBSD implementation for this. */
uint16_t *words = (uint16_t *)data;
@@ -80,7 +81,8 @@ rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
* return value is in network byte order.
*/
-uint16_t nm_csum_fold(rawsum_t cur_sum)
+uint16_t
+nm_csum_fold(rawsum_t cur_sum)
{
/* TODO XXX please use the FreeBSD implementation for this. */
while (cur_sum >> 16)
@@ -89,7 +91,8 @@ uint16_t nm_csum_fold(rawsum_t cur_sum)
return htobe16((~cur_sum) & 0xFFFF);
}
-uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
+uint16_t
+nm_csum_ipv4(struct nm_iphdr *iph)
{
#if 0
return in_cksum_hdr((void *)iph);
@@ -98,7 +101,8 @@ uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
#endif
}
-void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+void
+nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
size_t datalen, uint16_t *check)
{
#ifdef INET
@@ -120,7 +124,8 @@ void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
#endif
}
-void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+void
+nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
size_t datalen, uint16_t *check)
{
#ifdef INET6
@@ -143,7 +148,8 @@ void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
int
netmap_catch_rx(struct netmap_adapter *na, int intercept)
{
- struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+ struct netmap_generic_adapter *gna =
+ (struct netmap_generic_adapter *)na;
struct ifnet *ifp = na->ifp;
if (intercept) {
@@ -209,11 +215,29 @@ generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
{
int ret;
- m->m_len = m->m_pkthdr.len = 0;
+ /*
+ * The mbuf should be a cluster from our special pool,
+ * so we do not need to do an m_copyback but just copy
+ * (and eventually, just reference the netmap buffer)
+ */
- // copy data to the mbuf
- m_copyback(m, 0, len, addr);
- // inc refcount. We are alone, so we can skip the atomic
+ if (*m->m_ext.ref_cnt != 1) {
+ D("invalid refcnt %d for %p",
+ *m->m_ext.ref_cnt, m);
+ panic("in generic_xmit_frame");
+ }
+ // XXX the ext_size check is unnecessary if we link the netmap buf
+ if (m->m_ext.ext_size < len) {
+ RD(5, "size %d < len %d", m->m_ext.ext_size, len);
+ len = m->m_ext.ext_size;
+ }
+ if (1) { /* XXX seems to have negligible benefits */
+ m->m_ext.ext_buf = m->m_data = addr;
+ } else {
+ bcopy(addr, m->m_data, len);
+ }
+ m->m_len = m->m_pkthdr.len = len;
+ // inc refcount. All ours, we could skip the atomic
atomic_fetchadd_int(m->m_ext.ref_cnt, 1);
m->m_flags |= M_FLOWID;
m->m_pkthdr.flowid = ring_nr;
@@ -223,6 +247,14 @@ generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
}
+#if __FreeBSD_version >= 1100005
+struct netmap_adapter *
+netmap_getna(if_t ifp)
+{
+ return (NA((struct ifnet *)ifp));
+}
+#endif /* __FreeBSD_version >= 1100005 */
+
/*
* The following two functions are empty until we have a generic
* way to extract the info from the ifp
@@ -230,7 +262,7 @@ generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
int
generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
{
- D("called");
+ D("called, in tx %d rx %d", *tx, *rx);
return 0;
}
@@ -238,13 +270,14 @@ generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
void
generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
{
- D("called");
+ D("called, in txq %d rxq %d", *txq, *rxq);
*txq = netmap_generic_rings;
*rxq = netmap_generic_rings;
}
-void netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na)
+void
+netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na)
{
ND("called");
mit->mit_pending = 0;
@@ -252,26 +285,30 @@ void netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *n
}
-void netmap_mitigation_start(struct nm_generic_mit *mit)
+void
+netmap_mitigation_start(struct nm_generic_mit *mit)
{
ND("called");
}
-void netmap_mitigation_restart(struct nm_generic_mit *mit)
+void
+netmap_mitigation_restart(struct nm_generic_mit *mit)
{
ND("called");
}
-int netmap_mitigation_active(struct nm_generic_mit *mit)
+int
+netmap_mitigation_active(struct nm_generic_mit *mit)
{
ND("called");
return 0;
}
-void netmap_mitigation_cleanup(struct nm_generic_mit *mit)
+void
+netmap_mitigation_cleanup(struct nm_generic_mit *mit)
{
ND("called");
}
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
index 63253b6..ef8a8f3 100644
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -81,20 +81,26 @@ __FBSDID("$FreeBSD$");
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
-#define rtnl_lock() D("rtnl_lock called");
-#define rtnl_unlock() D("rtnl_unlock called");
+#define rtnl_lock() ND("rtnl_lock called")
+#define rtnl_unlock() ND("rtnl_unlock called")
#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
/*
- * mbuf wrappers
+ * FreeBSD mbuf allocator/deallocator in emulation mode:
+ *
+ * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE
+ * so that the destructor, if invoked, will not free the packet.
+ * In principle we should set the destructor only on demand,
+ * but since there might be a race we better do it on allocation.
+ * As a consequence, we also need to set the destructor or we
+ * would leak buffers.
*/
/*
- * we allocate an EXT_PACKET
+ * mbuf wrappers
*/
-#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
/* mbuf destructor, also need to change the type to EXT_EXTREF,
* add an M_NOFREE flag, and then clear the flag and
@@ -106,6 +112,32 @@ __FBSDID("$FreeBSD$");
(m)->m_ext.ext_type = EXT_EXTREF; \
} while (0)
+static void
+netmap_default_mbuf_destructor(struct mbuf *m)
+{
+ /* restore original mbuf */
+ m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1;
+ m->m_ext.ext_arg1 = NULL;
+ m->m_ext.ext_type = EXT_PACKET;
+ m->m_ext.ext_free = NULL;
+ if (*(m->m_ext.ref_cnt) == 0)
+ *(m->m_ext.ref_cnt) = 1;
+ uma_zfree(zone_pack, m);
+}
+
+static inline struct mbuf *
+netmap_get_mbuf(int len)
+{
+ struct mbuf *m;
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR | M_NOFREE);
+ if (m) {
+ m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save
+ m->m_ext.ext_free = (void *)netmap_default_mbuf_destructor;
+ m->m_ext.ext_type = EXT_EXTREF;
+ ND(5, "create m %p refcnt %d", m, *m->m_ext.ref_cnt);
+ }
+ return m;
+}
#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
@@ -223,7 +255,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
#endif /* REG_RESET */
if (enable) { /* Enable netmap mode. */
- /* Init the mitigation support. */
+ /* Init the mitigation support on all the rx queues. */
gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
M_DEVBUF, M_NOWAIT | M_ZERO);
if (!gna->mit) {
@@ -373,15 +405,11 @@ out:
static void
generic_mbuf_destructor(struct mbuf *m)
{
- if (netmap_verbose)
- D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
#ifdef __FreeBSD__
- m->m_ext.ext_type = EXT_PACKET;
- m->m_ext.ext_free = NULL;
- if (*(m->m_ext.ref_cnt) == 0)
- *(m->m_ext.ref_cnt) = 1;
- uma_zfree(zone_pack, m);
+ if (netmap_verbose)
+ RD(5, "Tx irq (%p) queue %d index %d" , m, MBUF_TXQ(m), (int)(uintptr_t)m->m_ext.ext_arg1);
+ netmap_default_mbuf_destructor(m);
#endif /* __FreeBSD__ */
IFRATE(rate_ctx.new.txirq++);
}
@@ -471,12 +499,12 @@ generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
e = generic_tx_event_middle(kring, hwcur);
m = kring->tx_pool[e];
+ ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? GET_MBUF_REFCNT(m) : -2 );
if (m == NULL) {
/* This can happen if there is already an event on the netmap
slot 'e': There is nothing to do. */
return;
}
- ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
kring->tx_pool[e] = NULL;
SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
@@ -770,6 +798,10 @@ generic_netmap_attach(struct ifnet *ifp)
generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
+ if (num_tx_desc == 0 || num_rx_desc == 0) {
+ D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
+ return EINVAL;
+ }
gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (gna == NULL) {
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index ddcb0e3..087564c 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -62,6 +62,9 @@
#define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1))
#define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0)
+#if __FreeBSD_version >= 1100005
+struct netmap_adapter *netmap_getna(if_t ifp);
+#endif
MALLOC_DECLARE(M_NETMAP);
@@ -183,9 +186,6 @@ extern NMG_LOCK_T netmap_global_lock;
* the next empty buffer as known by the hardware (next_to_check or so).
* TX rings: hwcur + hwofs coincides with next_to_send
*
- * Clients cannot issue concurrent syscall on a ring. The system
- * detects this and reports an error using two flags,
- * NKR_WBUSY and NKR_RBUSY
* For received packets, slot->flags is set to nkr_slot_flags
* so we can provide a proper initial value (e.g. set NS_FORWARD
* when operating in 'transparent' mode).
@@ -208,7 +208,7 @@ extern NMG_LOCK_T netmap_global_lock;
* The kring is manipulated by txsync/rxsync and generic netmap function.
*
* Concurrent rxsync or txsync on the same ring are prevented through
- * by nm_kr_lock() which in turn uses nr_busy. This is all we need
+ * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need
* for NIC rings, and for TX rings attached to the host stack.
*
* RX rings attached to the host stack use an mbq (rx_queue) on both
@@ -440,15 +440,18 @@ struct netmap_adapter {
/*
* nm_dtor() is the cleanup routine called when destroying
* the adapter.
+ * Called with NMG_LOCK held.
*
* nm_register() is called on NIOCREGIF and close() to enter
* or exit netmap mode on the NIC
+ * Called with NMG_LOCK held.
*
* nm_txsync() pushes packets to the underlying hw/switch
*
* nm_rxsync() collects packets from the underlying hw/switch
*
* nm_config() returns configuration information from the OS
+ * Called with NMG_LOCK held.
*
* nm_krings_create() create and init the krings array
* (the array layout must conform to the description
@@ -456,13 +459,12 @@ struct netmap_adapter {
*
* nm_krings_delete() cleanup and delete the kring array
*
- * nm_notify() is used to act after data have become available.
+ * nm_notify() is used to act after data have become available
+ * (or the stopped state of the ring has changed)
* For hw devices this is typically a selwakeup(),
* but for NIC/host ports attached to a switch (or vice-versa)
* we also need to invoke the 'txsync' code downstream.
*/
-
- /* private cleanup */
void (*nm_dtor)(struct netmap_adapter *);
int (*nm_register)(struct netmap_adapter *, int onoff);
@@ -678,7 +680,7 @@ static inline uint32_t
nm_kr_rxspace(struct netmap_kring *k)
{
int space = k->nr_hwtail - k->nr_hwcur;
- if (space < 0)
+ if (space < 0)
space += k->nkr_num_slots;
ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail);
@@ -827,7 +829,7 @@ nm_txsync_finalize(struct netmap_kring *kring)
{
/* update ring tail to what the kernel knows */
kring->ring->tail = kring->rtail = kring->nr_hwtail;
-
+
/* note, head/rhead/hwcur might be behind cur/rcur
* if no carrier
*/
@@ -1376,5 +1378,4 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
struct netmap_vp_adapter *dst_na,
struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
u_int *j, u_int lim, u_int *howmany);
-
#endif /* _NET_NETMAP_KERN_H_ */
diff --git a/sys/dev/netmap/netmap_mbq.c b/sys/dev/netmap/netmap_mbq.c
index 2606b13..503f5a1 100644
--- a/sys/dev/netmap/netmap_mbq.c
+++ b/sys/dev/netmap/netmap_mbq.c
@@ -76,9 +76,9 @@ static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m)
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m)
{
- mtx_lock(&q->lock);
+ mbq_lock(q);
__mbq_enqueue(q, m);
- mtx_unlock(&q->lock);
+ mbq_unlock(q);
}
@@ -110,9 +110,9 @@ struct mbuf *mbq_safe_dequeue(struct mbq *q)
{
struct mbuf *ret;
- mtx_lock(&q->lock);
+ mbq_lock(q);
ret = __mbq_dequeue(q);
- mtx_unlock(&q->lock);
+ mbq_unlock(q);
return ret;
}
diff --git a/sys/dev/netmap/netmap_mbq.h b/sys/dev/netmap/netmap_mbq.h
index d273d8a..a011c4c 100644
--- a/sys/dev/netmap/netmap_mbq.h
+++ b/sys/dev/netmap/netmap_mbq.h
@@ -62,7 +62,17 @@ void mbq_enqueue(struct mbq *q, struct mbuf *m);
struct mbuf *mbq_dequeue(struct mbq *q);
void mbq_purge(struct mbq *q);
-/* XXX missing mbq_lock() and mbq_unlock */
+static inline void
+mbq_lock(struct mbq *q)
+{
+ mtx_lock_spin(&q->lock);
+}
+
+static inline void
+mbq_unlock(struct mbq *q)
+{
+ mtx_unlock_spin(&q->lock);
+}
void mbq_safe_init(struct mbq *q);
void mbq_safe_destroy(struct mbq *q);
diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
index 5491845..d237794 100644
--- a/sys/dev/netmap/netmap_mem2.c
+++ b/sys/dev/netmap/netmap_mem2.c
@@ -992,7 +992,7 @@ netmap_mem_private_new(const char *name, u_int txr, u_int txd,
if (p[NETMAP_RING_POOL].num < v)
p[NETMAP_RING_POOL].num = v;
/* for each pipe we only need the buffers for the 4 "real" rings.
- * On the other end, the pipe ring dimension may be different from
+ * On the other end, the pipe ring dimension may be different from
* the parent port ring dimension. As a compromise, we allocate twice the
* space actually needed if the pipe rings were the same size as the parent rings
*/
diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c
index f8f29fa..9fcc4d2 100644
--- a/sys/dev/netmap/netmap_pipe.c
+++ b/sys/dev/netmap/netmap_pipe.c
@@ -391,7 +391,7 @@ err:
/* netmap_pipe_reg.
*
* There are two cases on registration (onoff==1)
- *
+ *
* 1.a) state is
*
* usr1 --> e1 --> e2
@@ -403,7 +403,7 @@ err:
* usr1 --> e1 --> e2 <-- usr2
*
* and we are e2. Drop the ref e1 is holding.
- *
+ *
* There are two additional cases on unregister (onoff==0)
*
* 2.a) state is
@@ -462,14 +462,14 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
*
* 1) state is
*
- * usr1 --> e1 --> e2
+ * usr1 --> e1 --> e2
*
- * and we are e1 (e2 is not registered, so krings_delete cannot be
+ * and we are e1 (e2 is not registered, so krings_delete cannot be
* called on it);
*
* 2) state is
*
- * usr1 --> e1 e2 <-- usr2
+ * usr1 --> e1 e2 <-- usr2
*
* and we are either e1 or e2.
*
@@ -519,7 +519,7 @@ netmap_pipe_dtor(struct netmap_adapter *na)
pna->peer_ref = 0;
netmap_adapter_put(&pna->peer->up);
}
- if (pna->role == NR_REG_PIPE_MASTER)
+ if (pna->role == NR_REG_PIPE_MASTER)
netmap_pipe_remove(pna->parent, pna);
netmap_adapter_put(pna->parent);
free(na->ifp, M_DEVBUF);
@@ -587,7 +587,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
error = ENODEV;
goto put_out;
}
- /* we create both master and slave.
+ /* we create both master and slave.
* The endpoint we were asked for holds a reference to
* the other one.
*/
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index 34e3912..8e309e9 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -959,6 +959,14 @@ nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
ft[ft_i].ft_next = NM_FT_NULL;
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
+ if (unlikely(buf == NULL)) {
+ RD(5, "NULL %s buffer pointer from %s slot %d len %d",
+ (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
+ kring->name, j, ft[ft_i].ft_len);
+ buf = ft[ft_i].ft_buf = NMB_VA(0); /* the 'null' buffer */
+ ft[ft_i].ft_len = 0;
+ ft[ft_i].ft_flags = 0;
+ }
__builtin_prefetch(buf);
++ft_i;
if (slot->flags & NS_MOREFRAG) {
@@ -1064,7 +1072,7 @@ netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
uint64_t smac, dmac;
if (buf_len < 14) {
- D("invalid buf length %d", buf_len);
+ RD(5, "invalid buf length %d", buf_len);
return NM_BDG_NOPORT;
}
dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
@@ -1312,6 +1320,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
needed = d->bq_len + brddst->bq_len;
if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
+ RD(3, "virt_hdr_mismatch, src %d len %d", na->virt_hdr_len, dst_na->virt_hdr_len);
/* There is a virtio-net header/offloadings mismatch between
* source and destination. The slower mismatch datapath will
* be used to cope with all the mismatches.
@@ -1412,6 +1421,11 @@ retry:
/* round to a multiple of 64 */
copy_len = (copy_len + 63) & ~63;
+ if (unlikely(copy_len > NETMAP_BUF_SIZE ||
+ copy_len > NETMAP_BUF_SIZE)) {
+ RD(5, "invalid len %d, down to 64", (int)copy_len);
+ copy_len = dst_len = 64; // XXX
+ }
if (ft_p->ft_flags & NS_INDIRECT) {
if (copyin(src, dst, copy_len)) {
// invalid user pointer, pretend len is 0
@@ -1783,7 +1797,7 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx,
if (is_host_ring) {
vpna = hostna;
ring_nr = 0;
- }
+ }
/* simulate a user wakeup on the rx ring */
/* fetch packets that have arrived.
* XXX maybe do this in a loop ?
OpenPOWER on IntegriCloud