summaryrefslogtreecommitdiffstats
path: root/sys/dev/mxge/if_mxge.c
diff options
context:
space:
mode:
authorgallatin <gallatin@FreeBSD.org>2013-02-21 21:28:33 +0000
committergallatin <gallatin@FreeBSD.org>2013-02-21 21:28:33 +0000
commit966be4941492002f86456a9088733904f5559c59 (patch)
treefcbf0037b80b68ce65939bc809992a1d8136674b /sys/dev/mxge/if_mxge.c
parentd9f661c57a95d2cc036dc1b269c8527f65836b4b (diff)
downloadFreeBSD-src-966be4941492002f86456a9088733904f5559c59.zip
FreeBSD-src-966be4941492002f86456a9088733904f5559c59.tar.gz
Improve mxge's receive performance for IPv6:
- Add support for IPv6 rx csum offload - Finally switch mxge from using its own driver lro, to using tcp_lro MFC after: 7 days Sponsored by: Myricom Inc.
Diffstat (limited to 'sys/dev/mxge/if_mxge.c')
-rw-r--r--sys/dev/mxge/if_mxge.c289
1 files changed, 147 insertions, 142 deletions
diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c
index 68afbf9..fcc0825 100644
--- a/sys/dev/mxge/if_mxge.c
+++ b/sys/dev/mxge/if_mxge.c
@@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
+#include <netinet/tcp_lro.h>
#include <netinet6/ip6_var.h>
#include <machine/bus.h>
@@ -102,7 +103,6 @@ static int mxge_intr_coal_delay = 30;
static int mxge_deassert_wait = 1;
static int mxge_flow_control = 1;
static int mxge_verbose = 0;
-static int mxge_lro_cnt = 8;
static int mxge_ticks;
static int mxge_max_slices = 1;
static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
@@ -1311,9 +1311,9 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup)
ss->tx.stall = 0;
ss->rx_big.cnt = 0;
ss->rx_small.cnt = 0;
- ss->lro_bad_csum = 0;
- ss->lro_queued = 0;
- ss->lro_flushed = 0;
+ ss->lc.lro_bad_csum = 0;
+ ss->lc.lro_queued = 0;
+ ss->lc.lro_flushed = 0;
if (ss->fw_stats != NULL) {
bzero(ss->fw_stats, sizeof *ss->fw_stats);
}
@@ -1414,50 +1414,6 @@ mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
}
static int
-mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
-{
- struct ifnet *ifp;
- int err = 0;
-
- ifp = sc->ifp;
- if (lro_cnt == 0)
- ifp->if_capenable &= ~IFCAP_LRO;
- else
- ifp->if_capenable |= IFCAP_LRO;
- sc->lro_cnt = lro_cnt;
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- mxge_close(sc, 0);
- err = mxge_open(sc);
- }
- return err;
-}
-
-static int
-mxge_change_lro(SYSCTL_HANDLER_ARGS)
-{
- mxge_softc_t *sc;
- unsigned int lro_cnt;
- int err;
-
- sc = arg1;
- lro_cnt = sc->lro_cnt;
- err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
- if (err != 0)
- return err;
-
- if (lro_cnt == sc->lro_cnt)
- return 0;
-
- if (lro_cnt > 128)
- return EINVAL;
-
- mtx_lock(&sc->driver_mtx);
- err = mxge_change_lro_locked(sc, lro_cnt);
- mtx_unlock(&sc->driver_mtx);
- return err;
-}
-
-static int
mxge_handle_be32(SYSCTL_HANDLER_ARGS)
{
int err;
@@ -1653,14 +1609,6 @@ mxge_add_sysctls(mxge_softc_t *sc)
CTLFLAG_RW, &mxge_verbose,
0, "verbose printing");
- /* lro */
- SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
- "lro_cnt",
- CTLTYPE_INT|CTLFLAG_RW, sc,
- 0, mxge_change_lro,
- "I", "number of lro merge queues");
-
-
/* add counters exported for debugging from all slices */
sysctl_ctx_init(&sc->slice_sysctl_ctx);
sc->slice_sysctl_tree =
@@ -1686,11 +1634,15 @@ mxge_add_sysctls(mxge_softc_t *sc)
CTLFLAG_RD, &ss->rx_big.cnt,
0, "rx_small_cnt");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
+ "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
0, "number of lro merge queues flushed");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "lro_queued", CTLFLAG_RD, &ss->lro_queued,
+ "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
+ 0, "number of bad csums preventing LRO");
+
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
0, "number of frames appended to lro merge"
"queues");
@@ -2534,6 +2486,64 @@ done:
return err;
}
+#ifdef INET6
+
+static uint16_t
+mxge_csum_generic(uint16_t *raw, int len)
+{
+ uint32_t csum;
+
+
+ csum = 0;
+ while (len > 0) {
+ csum += *raw;
+ raw++;
+ len -= 2;
+ }
+ csum = (csum >> 16) + (csum & 0xffff);
+ csum = (csum >> 16) + (csum & 0xffff);
+ return (uint16_t)csum;
+}
+
+static inline uint16_t
+mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
+{
+ uint32_t partial;
+ int nxt, cksum_offset;
+ struct ip6_hdr *ip6 = p;
+ uint16_t c;
+
+ nxt = ip6->ip6_nxt;
+ cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
+ if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
+ cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
+ IPPROTO_IPV6, &nxt);
+ if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
+ return (1);
+ }
+
+ /*
+ * IPv6 headers do not contain a checksum, and hence
+ * do not checksum to zero, so they don't "fall out"
+ * of the partial checksum calculation like IPv4
+ * headers do. We need to fix the partial checksum by
+ * subtracting the checksum of the IPv6 header.
+ */
+
+ partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
+ ETHER_HDR_LEN);
+ csum += ~partial;
+ csum += (csum < ~partial);
+ csum = (csum >> 16) + (csum & 0xFFFF);
+ csum = (csum >> 16) + (csum & 0xFFFF);
+ c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
+ csum);
+
+// printf("%d %d %x %x %x %x %x\n", m->m_pkthdr.len, cksum_offset, c, csum, ocsum, partial, d);
+ c ^= 0xffff;
+ return (c);
+}
+#endif /* INET6 */
/*
* Myri10GE hardware checksums are not valid if the sender
* padded the frame with non-zero padding. This is because
@@ -2547,26 +2557,39 @@ static inline uint16_t
mxge_rx_csum(struct mbuf *m, int csum)
{
struct ether_header *eh;
+#ifdef INET
struct ip *ip;
- uint16_t c;
+#endif
+ int cap = m->m_pkthdr.rcvif->if_capenable;
+ uint16_t c, etype;
- eh = mtod(m, struct ether_header *);
- /* only deal with IPv4 TCP & UDP for now */
- if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
- return 1;
- ip = (struct ip *)(eh + 1);
- if (__predict_false(ip->ip_p != IPPROTO_TCP &&
- ip->ip_p != IPPROTO_UDP))
- return 1;
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ switch (etype) {
#ifdef INET
- c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htonl(ntohs(csum) + ntohs(ip->ip_len) +
- - (ip->ip_hl << 2) + ip->ip_p));
-#else
- c = 1;
+ case ETHERTYPE_IP:
+ if ((cap & IFCAP_RXCSUM) == 0)
+ return (1);
+ ip = (struct ip *)(eh + 1);
+ if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
+ return (1);
+ c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htonl(ntohs(csum) + ntohs(ip->ip_len) -
+ (ip->ip_hl << 2) + ip->ip_p));
+ c ^= 0xffff;
+ break;
#endif
- c ^= 0xffff;
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ if ((cap & IFCAP_RXCSUM_IPV6) == 0)
+ return (1);
+ c = mxge_rx_csum6((eh + 1), m, csum);
+ break;
+#endif
+ default:
+ c = 1;
+ }
return (c);
}
@@ -2628,7 +2651,8 @@ mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
static inline void
-mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
+mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
+ uint32_t csum, int lro)
{
mxge_softc_t *sc;
struct ifnet *ifp;
@@ -2637,7 +2661,6 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
- uint16_t tcpudp_csum;
sc = ss->sc;
ifp = sc->ifp;
@@ -2674,14 +2697,18 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
- if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
- if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
- return;
- /* otherwise, it was a UDP frame, or a TCP frame which
- we could not do LRO on. Tell the stack that the
- checksum is good */
+
+ if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
+ (0 == mxge_rx_csum(m, csum))) {
+ /* Tell the stack that the checksum is good */
m->m_pkthdr.csum_data = 0xffff;
- m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
+ m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
+ CSUM_DATA_VALID;
+
+#if defined(INET) || defined (INET6)
+ if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
+ return;
+#endif
}
/* flowid only valid if RSS hashing is enabled */
if (sc->num_slices > 1) {
@@ -2693,7 +2720,8 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
}
static inline void
-mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
+mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
+ uint32_t csum, int lro)
{
mxge_softc_t *sc;
struct ifnet *ifp;
@@ -2702,7 +2730,6 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
- uint16_t tcpudp_csum;
sc = ss->sc;
ifp = sc->ifp;
@@ -2739,14 +2766,17 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
- if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
- if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
- return;
- /* otherwise, it was a UDP frame, or a TCP frame which
- we could not do LRO on. Tell the stack that the
- checksum is good */
+ if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
+ (0 == mxge_rx_csum(m, csum))) {
+ /* Tell the stack that the checksum is good */
m->m_pkthdr.csum_data = 0xffff;
- m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
+ m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
+ CSUM_DATA_VALID;
+
+#if defined(INET) || defined (INET6)
+ if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
+ return;
+#endif
}
/* flowid only valid if RSS hashing is enabled */
if (sc->num_slices > 1) {
@@ -2764,16 +2794,17 @@ mxge_clean_rx_done(struct mxge_slice_state *ss)
int limit = 0;
uint16_t length;
uint16_t checksum;
+ int lro;
-
+ lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
while (rx_done->entry[rx_done->idx].length != 0) {
length = ntohs(rx_done->entry[rx_done->idx].length);
rx_done->entry[rx_done->idx].length = 0;
checksum = rx_done->entry[rx_done->idx].checksum;
if (length <= (MHLEN - MXGEFW_PAD))
- mxge_rx_done_small(ss, length, checksum);
+ mxge_rx_done_small(ss, length, checksum, lro);
else
- mxge_rx_done_big(ss, length, checksum);
+ mxge_rx_done_big(ss, length, checksum, lro);
rx_done->cnt++;
rx_done->idx = rx_done->cnt & rx_done->mask;
@@ -2781,11 +2812,11 @@ mxge_clean_rx_done(struct mxge_slice_state *ss)
if (__predict_false(++limit > rx_done->mask / 2))
break;
}
-#ifdef INET
- while (!SLIST_EMPTY(&ss->lro_active)) {
- struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
- SLIST_REMOVE_HEAD(&ss->lro_active, next);
- mxge_lro_flush(ss, lro);
+#if defined(INET) || defined (INET6)
+ while (!SLIST_EMPTY(&ss->lc.lro_active)) {
+ struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active);
+ SLIST_REMOVE_HEAD(&ss->lc.lro_active, next);
+ tcp_lro_flush(&ss->lc, lro);
}
#endif
}
@@ -3153,15 +3184,11 @@ mxge_init(void *arg)
static void
mxge_free_slice_mbufs(struct mxge_slice_state *ss)
{
- struct lro_entry *lro_entry;
int i;
- while (!SLIST_EMPTY(&ss->lro_free)) {
- lro_entry = SLIST_FIRST(&ss->lro_free);
- SLIST_REMOVE_HEAD(&ss->lro_free, next);
- free(lro_entry, M_DEVBUF);
- }
-
+#if defined(INET) || defined(INET6)
+ tcp_lro_free(&ss->lc);
+#endif
for (i = 0; i <= ss->rx_big.mask; i++) {
if (ss->rx_big.info[i].m == NULL)
continue;
@@ -3545,26 +3572,17 @@ mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
mxge_softc_t *sc;
mxge_cmd_t cmd;
bus_dmamap_t map;
- struct lro_entry *lro_entry;
int err, i, slice;
sc = ss->sc;
slice = ss - sc->ss;
- SLIST_INIT(&ss->lro_free);
- SLIST_INIT(&ss->lro_active);
-
- for (i = 0; i < sc->lro_cnt; i++) {
- lro_entry = (struct lro_entry *)
- malloc(sizeof (*lro_entry), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (lro_entry == NULL) {
- sc->lro_cnt = i;
- break;
- }
- SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
- }
+#if defined(INET) || defined(INET6)
+ (void)tcp_lro_init(&ss->lc);
+#endif
+ ss->lc.ifp = sc->ifp;
+
/* get the lanai pointers to the send and receive rings */
err = 0;
@@ -4219,10 +4237,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
} else if (mask & IFCAP_RXCSUM) {
if (IFCAP_RXCSUM & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_RXCSUM;
- sc->csum_flag = 0;
} else {
ifp->if_capenable |= IFCAP_RXCSUM;
- sc->csum_flag = 1;
}
}
if (mask & IFCAP_TSO4) {
@@ -4249,16 +4265,12 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
ifp->if_hwassist |= (CSUM_TCP_IPV6
| CSUM_UDP_IPV6);
}
-#ifdef NOTYET
- } else if (mask & IFCAP_RXCSUM6) {
- if (IFCAP_RXCSUM6 & ifp->if_capenable) {
- ifp->if_capenable &= ~IFCAP_RXCSUM6;
- sc->csum_flag = 0;
+ } else if (mask & IFCAP_RXCSUM_IPV6) {
+ if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
+ ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
} else {
- ifp->if_capenable |= IFCAP_RXCSUM6;
- sc->csum_flag = 1;
+ ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
}
-#endif
}
if (mask & IFCAP_TSO6) {
if (IFCAP_TSO6 & ifp->if_capenable) {
@@ -4274,12 +4286,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
}
#endif /*IFCAP_TSO6 */
- if (mask & IFCAP_LRO) {
- if (IFCAP_LRO & ifp->if_capenable)
- err = mxge_change_lro_locked(sc, 0);
- else
- err = mxge_change_lro_locked(sc, mxge_lro_cnt);
- }
+ if (mask & IFCAP_LRO)
+ ifp->if_capenable ^= IFCAP_LRO;
if (mask & IFCAP_VLAN_HWTAGGING)
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
if (mask & IFCAP_VLAN_HWTSO)
@@ -4326,14 +4334,11 @@ mxge_fetch_tunables(mxge_softc_t *sc)
TUNABLE_INT_FETCH("hw.mxge.verbose",
&mxge_verbose);
TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
- TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
- if (sc->lro_cnt != 0)
- mxge_lro_cnt = sc->lro_cnt;
if (bootverbose)
mxge_verbose = 1;
@@ -4897,8 +4902,9 @@ mxge_attach(device_t dev)
if_initbaudrate(ifp, IF_Gbps(10));
ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
- IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6;
-#ifdef INET
+ IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
+ IFCAP_RXCSUM_IPV6;
+#if defined(INET) || defined(INET6)
ifp->if_capabilities |= IFCAP_LRO;
#endif
@@ -4929,7 +4935,6 @@ mxge_attach(device_t dev)
ifp->if_capenable = ifp->if_capabilities;
if (sc->lro_cnt == 0)
ifp->if_capenable &= ~IFCAP_LRO;
- sc->csum_flag = 1;
ifp->if_init = mxge_init;
ifp->if_softc = sc;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
OpenPOWER on IntegriCloud