1 files changed, 179 insertions, 30 deletions
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index 63b766b..35d5303 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -120,10 +120,12 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
 
 int netmap_drop = 0;	/* debugging */
 int netmap_flags = 0;	/* debug flags */
+int netmap_fwd = 0;	/* force transparent mode */
 int netmap_copy = 0;	/* debugging, copy content */
 
 SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , "");
 
 #ifdef NM_BRIDGE /* support for netmap bridge */
@@ -647,63 +649,170 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 
 /*
  * Handlers for synchronization of the queues from/to the host.
- *
- * netmap_sync_to_host() passes packets up. We are called from a
- * system call in user process context, and the only contention
- * can be among multiple user threads erroneously calling
- * this routine concurrently. In principle we should not even
- * need to lock.
+ * Netmap has two operating modes:
+ * - in the default mode, the rings connected to the host stack are
+ *   just another ring pair managed by userspace;
+ * - in transparent mode (XXX to be defined) incoming packets
+ *   (from the host or the NIC) are marked as NS_FORWARD upon
+ *   arrival, and the user application has a chance to reset the
+ *   flag for packets that should be dropped.
+ *   On the RXSYNC or poll(), packets in RX rings between
+ *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
+ *   to the other side.
+ * The transfer NIC --> host is relatively easy, just encapsulate
+ * into mbufs and we are done. The host --> NIC side is slightly
+ * harder because there might not be room in the tx ring so it
+ * might take a while before releasing the buffer.
+ */
+
+/*
+ * pass a chain of buffers to the host stack as coming from 'dst'
  */
 static void
-netmap_sync_to_host(struct netmap_adapter *na)
+netmap_send_up(struct ifnet *dst, struct mbuf *head)
 {
-	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
-	struct netmap_ring *ring = kring->ring;
-	struct mbuf *head = NULL, *tail = NULL, *m;
-	u_int k, n, lim = kring->nkr_num_slots - 1;
+	struct mbuf *m;
 
-	k = ring->cur;
-	if (k > lim) {
-		netmap_ring_reinit(kring);
-		return;
+	/* send packets up, outside the lock */
+	while ((m = head) != NULL) {
+		head = head->m_nextpkt;
+		m->m_nextpkt = NULL;
+		if (netmap_verbose & NM_VERB_HOST)
+			D("sending up pkt %p size %d", m, MBUF_LEN(m));
+		NM_SEND_UP(dst, m);
 	}
-	// na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+}
 
-	/* Take packets from hwcur to cur and pass them up.
+struct mbq {
+	struct mbuf *head;
+	struct mbuf *tail;
+	int count;
+};
+
+/*
+ * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
+ * Run from hwcur to cur - reserved
+ */
+static void
+netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
+{
+	/* Take packets from hwcur to cur-reserved and pass them up.
 	 * In case of no buffers we give up. At the end of the loop,
 	 * the queue is drained in all cases.
+	 * XXX handle reserved
 	 */
+	int k = kring->ring->cur - kring->ring->reserved;
+	u_int n, lim = kring->nkr_num_slots - 1;
+	struct mbuf *m, *tail = q->tail;
+
+	if (k < 0)
+		k = k + kring->nkr_num_slots;
 	for (n = kring->nr_hwcur; n != k;) {
-		struct netmap_slot *slot = &ring->slot[n];
+		struct netmap_slot *slot = &kring->ring->slot[n];
 
 		n = (n == lim) ? 0 : n + 1;
+		if ((slot->flags & NS_FORWARD) == 0 && !force)
+			continue;
 		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) {
 			D("bad pkt at %d len %d", n, slot->len);
 			continue;
 		}
-		m = m_devget(NMB(slot), slot->len, 0, na->ifp, NULL);
+		slot->flags &= ~NS_FORWARD; // XXX needed ?
+		m = m_devget(NMB(slot), slot->len, 0, kring->na->ifp, NULL);
 
 		if (m == NULL)
 			break;
 		if (tail)
 			tail->m_nextpkt = m;
 		else
-			head = m;
+			q->head = m;
 		tail = m;
+		q->count++;
 		m->m_nextpkt = NULL;
 	}
+	q->tail = tail;
+}
+
+/*
+ * called under main lock to send packets from the host to the NIC
+ * The host ring has packets from nr_hwcur to (cur - reserved)
+ * to be sent down. We scan the tx rings, which have just been
+ * flushed so nr_hwcur == cur. Pushing packets down means
+ * increment cur and decrement avail.
+ * XXX to be verified
+ */
+static void
+netmap_sw_to_nic(struct netmap_adapter *na)
+{
+	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
+	struct netmap_kring *k1 = &na->tx_rings[0];
+	int i, howmany, src_lim, dst_lim;
+
+	howmany = kring->nr_hwavail;	/* XXX otherwise cur - reserved - nr_hwcur */
+
+	src_lim = kring->nkr_num_slots;
+	for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
+		ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
+		dst_lim = k1->nkr_num_slots;
+		while (howmany > 0 && k1->ring->avail > 0) {
+			struct netmap_slot *src, *dst, tmp;
+			src = &kring->ring->slot[kring->nr_hwcur];
+			dst = &k1->ring->slot[k1->ring->cur];
+			tmp = *src;
+			src->buf_idx = dst->buf_idx;
+			src->flags = NS_BUF_CHANGED;
+
+			dst->buf_idx = tmp.buf_idx;
+			dst->len = tmp.len;
+			dst->flags = NS_BUF_CHANGED;
+			ND("out len %d buf %d from %d to %d",
+				dst->len, dst->buf_idx,
+				kring->nr_hwcur, k1->ring->cur);
+
+			if (++kring->nr_hwcur >= src_lim)
+				kring->nr_hwcur = 0;
+			howmany--;
+			kring->nr_hwavail--;
+			if (++k1->ring->cur >= dst_lim)
+				k1->ring->cur = 0;
+			k1->ring->avail--;
+		}
+		kring->ring->cur = kring->nr_hwcur; // XXX
+		k1++;
+	}
+}
+
+/*
+ * netmap_sync_to_host() passes packets up. We are called from a
+ * system call in user process context, and the only contention
+ * can be among multiple user threads erroneously calling
+ * this routine concurrently.
+ */
+static void
+netmap_sync_to_host(struct netmap_adapter *na)
+{
+	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
+	struct netmap_ring *ring = kring->ring;
+	u_int k, lim = kring->nkr_num_slots - 1;
+	struct mbq q = { NULL, NULL };
+
+	k = ring->cur;
+	if (k > lim) {
+		netmap_ring_reinit(kring);
+		return;
+	}
+	// na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+
+	/* Take packets from hwcur to cur and pass them up.
+	 * In case of no buffers we give up. At the end of the loop,
+	 * the queue is drained in all cases.
+	 */
+	netmap_grab_packets(kring, &q, 1);
 	kring->nr_hwcur = k;
 	kring->nr_hwavail = ring->avail = lim;
 	// na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
 
-	/* send packets up, outside the lock */
-	while ((m = head) != NULL) {
-		head = head->m_nextpkt;
-		m->m_nextpkt = NULL;
-		if (netmap_verbose & NM_VERB_HOST)
-			D("sending up pkt %p size %d", m, MBUF_LEN(m));
-		NM_SEND_UP(na->ifp, m);
-	}
+	netmap_send_up(na->ifp, q.head);
 }
 
 /*
@@ -1236,7 +1345,8 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 	struct ifnet *ifp;
 	struct netmap_kring *kring;
 	u_int core_lock, i, check_all, want_tx, want_rx, revents = 0;
-	u_int lim_tx, lim_rx;
+	u_int lim_tx, lim_rx, host_forwarded = 0;
+	struct mbq q = { NULL, NULL, 0 };
 	enum {NO_CL, NEED_CL, LOCKED_CL }; /* see below */
 	void *pwait = dev;	/* linux compatibility */
 
@@ -1284,6 +1394,17 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 		return (revents);
 	}
 
+	/* if we are in transparent mode, check also the host rx ring */
+	kring = &na->rx_rings[lim_rx];
+	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
+			&& want_rx
+			&& (netmap_fwd || kring->ring->flags & NR_FORWARD) ) {
+		if (kring->ring->avail == 0)
+			netmap_sync_from_host(na, td, dev);
+		if (kring->ring->avail > 0)
+			revents |= want_rx;
+	}
+
 	/*
 	 * check_all is set if the card has more than one queue and
 	 * the client is polling all of them. If true, we sleep on
@@ -1359,6 +1480,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 	 * to avoid that the tx rings stall).
 	 */
 	if (priv->np_txpoll || want_tx) {
+flush_tx:
 		for (i = priv->np_qfirst; i < lim_tx; i++) {
 			kring = &na->tx_rings[i];
 			/*
@@ -1411,6 +1533,11 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 			}
 			if (na->separate_locks)
 				na->nm_lock(ifp, NETMAP_RX_LOCK, i);
+			if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
+				ND(10, "forwarding some buffers up %d to %d",
+				    kring->nr_hwcur, kring->ring->cur);
+				netmap_grab_packets(kring, &q, netmap_fwd);
+			}
 
 			if (na->nm_rxsync(ifp, i, 0 /* no lock */))
 				revents |= POLLERR;
@@ -1433,8 +1560,28 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 		if (want_rx)
 			selrecord(td, &na->rx_si);
 	}
+
+	/* forward host to the netmap ring */
+	kring = &na->rx_rings[lim_rx];
+	if (kring->nr_hwavail > 0)
+		ND("host rx %d has %d packets", lim_rx, kring->nr_hwavail);
+	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
+			&& (netmap_fwd || kring->ring->flags & NR_FORWARD)
+			 && kring->nr_hwavail > 0 && !host_forwarded) {
+		if (core_lock == NEED_CL) {
+			na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
+			core_lock = LOCKED_CL;
+		}
+		netmap_sw_to_nic(na);
+		host_forwarded = 1; /* prevent another pass */
+		want_rx = 0;
+		goto flush_tx;
+	}
+
 	if (core_lock == LOCKED_CL)
 		na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
+	if (q.head)
+		netmap_send_up(na->ifp, q.head);
 
 	return (revents);
 }
@@ -1586,7 +1733,8 @@ netmap_start(struct ifnet *ifp, struct mbuf *m)
 		goto done;	/* no space */
 	}
 	if (len > NETMAP_BUF_SIZE) {
-		D("drop packet size %d > %d", len, NETMAP_BUF_SIZE);
+		D("%s from_host, drop packet size %d > %d", ifp->if_xname,
+			len, NETMAP_BUF_SIZE);
 		goto done;	/* too long for us */
 	}
 
@@ -1597,6 +1745,7 @@ netmap_start(struct ifnet *ifp, struct mbuf *m)
 	slot = &kring->ring->slot[i];
 	m_copydata(m, 0, len, NMB(slot));
 	slot->len = len;
+	slot->flags = kring->nkr_slot_flags;
 	kring->nr_hwavail++;
 	if (netmap_verbose  & NM_VERB_HOST)
 		D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);