summaryrefslogtreecommitdiffstats
path: root/sys/dev/mxge
diff options
context:
space:
mode:
authorgallatin <gallatin@FreeBSD.org>2013-02-21 21:28:33 +0000
committergallatin <gallatin@FreeBSD.org>2013-02-21 21:28:33 +0000
commit966be4941492002f86456a9088733904f5559c59 (patch)
treefcbf0037b80b68ce65939bc809992a1d8136674b /sys/dev/mxge
parentd9f661c57a95d2cc036dc1b269c8527f65836b4b (diff)
downloadFreeBSD-src-966be4941492002f86456a9088733904f5559c59.zip
FreeBSD-src-966be4941492002f86456a9088733904f5559c59.tar.gz
Improve mxge's receive performance for IPv6:
- Add support for IPv6 rx csum offload - Finally switch mxge from using its own driver lro, to using tcp_lro MFC after: 7 days Sponsored by: Myricom Inc.
Diffstat (limited to 'sys/dev/mxge')
-rw-r--r--sys/dev/mxge/if_mxge.c289
-rw-r--r--sys/dev/mxge/if_mxge_var.h32
-rw-r--r--sys/dev/mxge/mxge_lro.c357
3 files changed, 148 insertions, 530 deletions
diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c
index 68afbf9..fcc0825 100644
--- a/sys/dev/mxge/if_mxge.c
+++ b/sys/dev/mxge/if_mxge.c
@@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
+#include <netinet/tcp_lro.h>
#include <netinet6/ip6_var.h>
#include <machine/bus.h>
@@ -102,7 +103,6 @@ static int mxge_intr_coal_delay = 30;
static int mxge_deassert_wait = 1;
static int mxge_flow_control = 1;
static int mxge_verbose = 0;
-static int mxge_lro_cnt = 8;
static int mxge_ticks;
static int mxge_max_slices = 1;
static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
@@ -1311,9 +1311,9 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup)
ss->tx.stall = 0;
ss->rx_big.cnt = 0;
ss->rx_small.cnt = 0;
- ss->lro_bad_csum = 0;
- ss->lro_queued = 0;
- ss->lro_flushed = 0;
+ ss->lc.lro_bad_csum = 0;
+ ss->lc.lro_queued = 0;
+ ss->lc.lro_flushed = 0;
if (ss->fw_stats != NULL) {
bzero(ss->fw_stats, sizeof *ss->fw_stats);
}
@@ -1414,50 +1414,6 @@ mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
}
static int
-mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
-{
- struct ifnet *ifp;
- int err = 0;
-
- ifp = sc->ifp;
- if (lro_cnt == 0)
- ifp->if_capenable &= ~IFCAP_LRO;
- else
- ifp->if_capenable |= IFCAP_LRO;
- sc->lro_cnt = lro_cnt;
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- mxge_close(sc, 0);
- err = mxge_open(sc);
- }
- return err;
-}
-
-static int
-mxge_change_lro(SYSCTL_HANDLER_ARGS)
-{
- mxge_softc_t *sc;
- unsigned int lro_cnt;
- int err;
-
- sc = arg1;
- lro_cnt = sc->lro_cnt;
- err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
- if (err != 0)
- return err;
-
- if (lro_cnt == sc->lro_cnt)
- return 0;
-
- if (lro_cnt > 128)
- return EINVAL;
-
- mtx_lock(&sc->driver_mtx);
- err = mxge_change_lro_locked(sc, lro_cnt);
- mtx_unlock(&sc->driver_mtx);
- return err;
-}
-
-static int
mxge_handle_be32(SYSCTL_HANDLER_ARGS)
{
int err;
@@ -1653,14 +1609,6 @@ mxge_add_sysctls(mxge_softc_t *sc)
CTLFLAG_RW, &mxge_verbose,
0, "verbose printing");
- /* lro */
- SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
- "lro_cnt",
- CTLTYPE_INT|CTLFLAG_RW, sc,
- 0, mxge_change_lro,
- "I", "number of lro merge queues");
-
-
/* add counters exported for debugging from all slices */
sysctl_ctx_init(&sc->slice_sysctl_ctx);
sc->slice_sysctl_tree =
@@ -1686,11 +1634,15 @@ mxge_add_sysctls(mxge_softc_t *sc)
CTLFLAG_RD, &ss->rx_big.cnt,
0, "rx_small_cnt");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
+ "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
0, "number of lro merge queues flushed");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
- "lro_queued", CTLFLAG_RD, &ss->lro_queued,
+ "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
+ 0, "number of bad csums preventing LRO");
+
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
0, "number of frames appended to lro merge"
"queues");
@@ -2534,6 +2486,64 @@ done:
return err;
}
+#ifdef INET6
+
+static uint16_t
+mxge_csum_generic(uint16_t *raw, int len)
+{
+ uint32_t csum;
+
+
+ csum = 0;
+ while (len > 0) {
+ csum += *raw;
+ raw++;
+ len -= 2;
+ }
+ csum = (csum >> 16) + (csum & 0xffff);
+ csum = (csum >> 16) + (csum & 0xffff);
+ return (uint16_t)csum;
+}
+
+static inline uint16_t
+mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
+{
+ uint32_t partial;
+ int nxt, cksum_offset;
+ struct ip6_hdr *ip6 = p;
+ uint16_t c;
+
+ nxt = ip6->ip6_nxt;
+ cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
+ if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
+ cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
+ IPPROTO_IPV6, &nxt);
+ if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
+ return (1);
+ }
+
+ /*
+ * IPv6 headers do not contain a checksum, and hence
+ * do not checksum to zero, so they don't "fall out"
+ * of the partial checksum calculation like IPv4
+ * headers do. We need to fix the partial checksum by
+ * subtracting the checksum of the IPv6 header.
+ */
+
+ partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
+ ETHER_HDR_LEN);
+ csum += ~partial;
+ csum += (csum < ~partial);
+ csum = (csum >> 16) + (csum & 0xFFFF);
+ csum = (csum >> 16) + (csum & 0xFFFF);
+ c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
+ csum);
+
+// printf("%d %d %x %x %x %x %x\n", m->m_pkthdr.len, cksum_offset, c, csum, ocsum, partial, d);
+ c ^= 0xffff;
+ return (c);
+}
+#endif /* INET6 */
/*
* Myri10GE hardware checksums are not valid if the sender
* padded the frame with non-zero padding. This is because
@@ -2547,26 +2557,39 @@ static inline uint16_t
mxge_rx_csum(struct mbuf *m, int csum)
{
struct ether_header *eh;
+#ifdef INET
struct ip *ip;
- uint16_t c;
+#endif
+ int cap = m->m_pkthdr.rcvif->if_capenable;
+ uint16_t c, etype;
- eh = mtod(m, struct ether_header *);
- /* only deal with IPv4 TCP & UDP for now */
- if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
- return 1;
- ip = (struct ip *)(eh + 1);
- if (__predict_false(ip->ip_p != IPPROTO_TCP &&
- ip->ip_p != IPPROTO_UDP))
- return 1;
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ switch (etype) {
#ifdef INET
- c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htonl(ntohs(csum) + ntohs(ip->ip_len) +
- - (ip->ip_hl << 2) + ip->ip_p));
-#else
- c = 1;
+ case ETHERTYPE_IP:
+ if ((cap & IFCAP_RXCSUM) == 0)
+ return (1);
+ ip = (struct ip *)(eh + 1);
+ if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
+ return (1);
+ c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htonl(ntohs(csum) + ntohs(ip->ip_len) -
+ (ip->ip_hl << 2) + ip->ip_p));
+ c ^= 0xffff;
+ break;
#endif
- c ^= 0xffff;
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ if ((cap & IFCAP_RXCSUM_IPV6) == 0)
+ return (1);
+ c = mxge_rx_csum6((eh + 1), m, csum);
+ break;
+#endif
+ default:
+ c = 1;
+ }
return (c);
}
@@ -2628,7 +2651,8 @@ mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
static inline void
-mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
+mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
+ uint32_t csum, int lro)
{
mxge_softc_t *sc;
struct ifnet *ifp;
@@ -2637,7 +2661,6 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
- uint16_t tcpudp_csum;
sc = ss->sc;
ifp = sc->ifp;
@@ -2674,14 +2697,18 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
- if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
- if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
- return;
- /* otherwise, it was a UDP frame, or a TCP frame which
- we could not do LRO on. Tell the stack that the
- checksum is good */
+
+ if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
+ (0 == mxge_rx_csum(m, csum))) {
+ /* Tell the stack that the checksum is good */
m->m_pkthdr.csum_data = 0xffff;
- m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
+ m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
+ CSUM_DATA_VALID;
+
+#if defined(INET) || defined (INET6)
+ if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
+ return;
+#endif
}
/* flowid only valid if RSS hashing is enabled */
if (sc->num_slices > 1) {
@@ -2693,7 +2720,8 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
}
static inline void
-mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
+mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
+ uint32_t csum, int lro)
{
mxge_softc_t *sc;
struct ifnet *ifp;
@@ -2702,7 +2730,6 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
- uint16_t tcpudp_csum;
sc = ss->sc;
ifp = sc->ifp;
@@ -2739,14 +2766,17 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
- if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
- if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
- return;
- /* otherwise, it was a UDP frame, or a TCP frame which
- we could not do LRO on. Tell the stack that the
- checksum is good */
+ if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
+ (0 == mxge_rx_csum(m, csum))) {
+ /* Tell the stack that the checksum is good */
m->m_pkthdr.csum_data = 0xffff;
- m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
+ m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
+ CSUM_DATA_VALID;
+
+#if defined(INET) || defined (INET6)
+ if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
+ return;
+#endif
}
/* flowid only valid if RSS hashing is enabled */
if (sc->num_slices > 1) {
@@ -2764,16 +2794,17 @@ mxge_clean_rx_done(struct mxge_slice_state *ss)
int limit = 0;
uint16_t length;
uint16_t checksum;
+ int lro;
-
+ lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
while (rx_done->entry[rx_done->idx].length != 0) {
length = ntohs(rx_done->entry[rx_done->idx].length);
rx_done->entry[rx_done->idx].length = 0;
checksum = rx_done->entry[rx_done->idx].checksum;
if (length <= (MHLEN - MXGEFW_PAD))
- mxge_rx_done_small(ss, length, checksum);
+ mxge_rx_done_small(ss, length, checksum, lro);
else
- mxge_rx_done_big(ss, length, checksum);
+ mxge_rx_done_big(ss, length, checksum, lro);
rx_done->cnt++;
rx_done->idx = rx_done->cnt & rx_done->mask;
@@ -2781,11 +2812,11 @@ mxge_clean_rx_done(struct mxge_slice_state *ss)
if (__predict_false(++limit > rx_done->mask / 2))
break;
}
-#ifdef INET
- while (!SLIST_EMPTY(&ss->lro_active)) {
- struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
- SLIST_REMOVE_HEAD(&ss->lro_active, next);
- mxge_lro_flush(ss, lro);
+#if defined(INET) || defined (INET6)
+ while (!SLIST_EMPTY(&ss->lc.lro_active)) {
+ struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active);
+ SLIST_REMOVE_HEAD(&ss->lc.lro_active, next);
+ tcp_lro_flush(&ss->lc, lro);
}
#endif
}
@@ -3153,15 +3184,11 @@ mxge_init(void *arg)
static void
mxge_free_slice_mbufs(struct mxge_slice_state *ss)
{
- struct lro_entry *lro_entry;
int i;
- while (!SLIST_EMPTY(&ss->lro_free)) {
- lro_entry = SLIST_FIRST(&ss->lro_free);
- SLIST_REMOVE_HEAD(&ss->lro_free, next);
- free(lro_entry, M_DEVBUF);
- }
-
+#if defined(INET) || defined(INET6)
+ tcp_lro_free(&ss->lc);
+#endif
for (i = 0; i <= ss->rx_big.mask; i++) {
if (ss->rx_big.info[i].m == NULL)
continue;
@@ -3545,26 +3572,17 @@ mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
mxge_softc_t *sc;
mxge_cmd_t cmd;
bus_dmamap_t map;
- struct lro_entry *lro_entry;
int err, i, slice;
sc = ss->sc;
slice = ss - sc->ss;
- SLIST_INIT(&ss->lro_free);
- SLIST_INIT(&ss->lro_active);
-
- for (i = 0; i < sc->lro_cnt; i++) {
- lro_entry = (struct lro_entry *)
- malloc(sizeof (*lro_entry), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (lro_entry == NULL) {
- sc->lro_cnt = i;
- break;
- }
- SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
- }
+#if defined(INET) || defined(INET6)
+ (void)tcp_lro_init(&ss->lc);
+#endif
+ ss->lc.ifp = sc->ifp;
+
/* get the lanai pointers to the send and receive rings */
err = 0;
@@ -4219,10 +4237,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
} else if (mask & IFCAP_RXCSUM) {
if (IFCAP_RXCSUM & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_RXCSUM;
- sc->csum_flag = 0;
} else {
ifp->if_capenable |= IFCAP_RXCSUM;
- sc->csum_flag = 1;
}
}
if (mask & IFCAP_TSO4) {
@@ -4249,16 +4265,12 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
ifp->if_hwassist |= (CSUM_TCP_IPV6
| CSUM_UDP_IPV6);
}
-#ifdef NOTYET
- } else if (mask & IFCAP_RXCSUM6) {
- if (IFCAP_RXCSUM6 & ifp->if_capenable) {
- ifp->if_capenable &= ~IFCAP_RXCSUM6;
- sc->csum_flag = 0;
+ } else if (mask & IFCAP_RXCSUM_IPV6) {
+ if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
+ ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
} else {
- ifp->if_capenable |= IFCAP_RXCSUM6;
- sc->csum_flag = 1;
+ ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
}
-#endif
}
if (mask & IFCAP_TSO6) {
if (IFCAP_TSO6 & ifp->if_capenable) {
@@ -4274,12 +4286,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
}
#endif /*IFCAP_TSO6 */
- if (mask & IFCAP_LRO) {
- if (IFCAP_LRO & ifp->if_capenable)
- err = mxge_change_lro_locked(sc, 0);
- else
- err = mxge_change_lro_locked(sc, mxge_lro_cnt);
- }
+ if (mask & IFCAP_LRO)
+ ifp->if_capenable ^= IFCAP_LRO;
if (mask & IFCAP_VLAN_HWTAGGING)
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
if (mask & IFCAP_VLAN_HWTSO)
@@ -4326,14 +4334,11 @@ mxge_fetch_tunables(mxge_softc_t *sc)
TUNABLE_INT_FETCH("hw.mxge.verbose",
&mxge_verbose);
TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
- TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
- if (sc->lro_cnt != 0)
- mxge_lro_cnt = sc->lro_cnt;
if (bootverbose)
mxge_verbose = 1;
@@ -4897,8 +4902,9 @@ mxge_attach(device_t dev)
if_initbaudrate(ifp, IF_Gbps(10));
ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
- IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6;
-#ifdef INET
+ IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
+ IFCAP_RXCSUM_IPV6;
+#if defined(INET) || defined(INET6)
ifp->if_capabilities |= IFCAP_LRO;
#endif
@@ -4929,7 +4935,6 @@ mxge_attach(device_t dev)
ifp->if_capenable = ifp->if_capabilities;
if (sc->lro_cnt == 0)
ifp->if_capenable &= ~IFCAP_LRO;
- sc->csum_flag = 1;
ifp->if_init = mxge_init;
ifp->if_softc = sc;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
diff --git a/sys/dev/mxge/if_mxge_var.h b/sys/dev/mxge/if_mxge_var.h
index 9393577..7ca1f39 100644
--- a/sys/dev/mxge/if_mxge_var.h
+++ b/sys/dev/mxge/if_mxge_var.h
@@ -194,31 +194,6 @@ typedef struct
char mtx_name[16];
} mxge_tx_ring_t;
-struct lro_entry;
-struct lro_entry
-{
- SLIST_ENTRY(lro_entry) next;
- struct mbuf *m_head;
- struct mbuf *m_tail;
- int timestamp;
- struct ip *ip;
- uint32_t tsval;
- uint32_t tsecr;
- uint32_t source_ip;
- uint32_t dest_ip;
- uint32_t next_seq;
- uint32_t ack_seq;
- uint32_t len;
- uint32_t data_csum;
- uint16_t window;
- uint16_t source_port;
- uint16_t dest_port;
- uint16_t append_cnt;
- uint16_t mss;
-
-};
-SLIST_HEAD(lro_head, lro_entry);
-
struct mxge_softc;
typedef struct mxge_softc mxge_softc_t;
@@ -236,11 +211,7 @@ struct mxge_slice_state {
u_long omcasts;
u_long oerrors;
int if_drv_flags;
- struct lro_head lro_active;
- struct lro_head lro_free;
- int lro_queued;
- int lro_flushed;
- int lro_bad_csum;
+ struct lro_ctrl lc;
mxge_dma_t fw_stats_dma;
struct sysctl_oid *sysctl_tree;
struct sysctl_ctx_list sysctl_ctx;
@@ -250,7 +221,6 @@ struct mxge_slice_state {
struct mxge_softc {
struct ifnet* ifp;
struct mxge_slice_state *ss;
- int csum_flag; /* rx_csums? */
int tx_boundary; /* boundary transmits cannot cross*/
int lro_cnt;
bus_dma_tag_t parent_dmat;
diff --git a/sys/dev/mxge/mxge_lro.c b/sys/dev/mxge/mxge_lro.c
deleted file mode 100644
index b313059..0000000
--- a/sys/dev/mxge/mxge_lro.c
+++ /dev/null
@@ -1,357 +0,0 @@
-/******************************************************************************
-
-Copyright (c) 2007-2008, Myricom Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Myricom Inc, nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/endian.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-#include <sys/bus.h>
-
-#include <net/if.h>
-#include <net/ethernet.h>
-#include <net/if_media.h>
-
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/tcp.h>
-
-#include <machine/bus.h>
-#include <machine/in_cksum.h>
-
-#include <dev/mxge/mxge_mcp.h>
-#include <dev/mxge/if_mxge_var.h>
-
-#include "opt_inet.h"
-
-#ifdef INET
-
-/* Assume len is a multiple of 4 */
-static uint16_t
-mxge_csum_generic(uint16_t *raw, int len)
-{
- uint32_t csum;
- csum = 0;
- while (len > 0) {
- csum += *raw;
- raw++;
- csum += *raw;
- raw++;
- len -= 4;
- }
- csum = (csum >> 16) + (csum & 0xffff);
- csum = (csum >> 16) + (csum & 0xffff);
- return (uint16_t)csum;
-}
-
-
-void
-mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
-{
- mxge_softc_t *mgp = ss->sc;
- struct ifnet *ifp;
- struct ip *ip;
- struct tcphdr *tcp;
- uint32_t *ts_ptr;
- uint32_t tcplen, tcp_csum;
-
- if (lro->append_cnt) {
- /* incorporate the new len into the ip header and
- * re-calculate the checksum */
- ip = lro->ip;
- ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
- ip->ip_sum = 0;
- ip->ip_sum = 0xffff ^
- mxge_csum_generic((uint16_t*)ip,
- sizeof (*ip));
-
- lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
- CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- lro->m_head->m_pkthdr.csum_data = 0xffff;
- lro->m_head->m_pkthdr.len = lro->len;
-
- /* incorporate the latest ack into the tcp header */
- tcp = (struct tcphdr *) (ip + 1);
- tcp->th_ack = lro->ack_seq;
- tcp->th_win = lro->window;
- /* incorporate latest timestamp into the tcp header */
- if (lro->timestamp) {
- ts_ptr = (uint32_t *)(tcp + 1);
- ts_ptr[1] = htonl(lro->tsval);
- ts_ptr[2] = lro->tsecr;
- }
- /*
- * update checksum in tcp header by re-calculating the
- * tcp pseudoheader checksum, and adding it to the checksum
- * of the tcp payload data
- */
- tcp->th_sum = 0;
- tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
- tcp_csum = lro->data_csum;
- tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(tcplen + IPPROTO_TCP));
- tcp_csum += mxge_csum_generic((uint16_t*)tcp,
- tcp->th_off << 2);
- tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
- tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
-#if 0
- IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n",
- in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(tcplen + IPPROTO_TCP)),
- mxge_csum_generic((uint16_t*)tcp,
- tcp->th_off << 2),
- htons(0xffff ^ tcp_csum));
-#endif
- tcp->th_sum = 0xffff ^ tcp_csum;
- }
- ifp = mgp->ifp;
- (*ifp->if_input)(mgp->ifp, lro->m_head);
- ss->lro_queued += lro->append_cnt + 1;
- ss->lro_flushed++;
- lro->m_head = NULL;
- lro->timestamp = 0;
- lro->append_cnt = 0;
- SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
-}
-
-int
-mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
-{
- struct ether_header *eh;
- struct ip *ip;
- struct tcphdr *tcp;
- uint32_t *ts_ptr;
- struct mbuf *m_nxt, *m_tail;
- struct lro_entry *lro;
- int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
- int opt_bytes, trim;
- uint32_t seq, tmp_csum, device_mtu;
-
- eh = mtod(m_head, struct ether_header *);
- if (eh->ether_type != htons(ETHERTYPE_IP))
- return 1;
- ip = (struct ip *) (eh + 1);
- if (ip->ip_p != IPPROTO_TCP)
- return 1;
-
- /* ensure there are no options */
- if ((ip->ip_hl << 2) != sizeof (*ip))
- return -1;
-
- /* .. and the packet is not fragmented */
- if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
- return -1;
-
- /* verify that the IP header checksum is correct */
- tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
- if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
- ss->lro_bad_csum++;
- return -1;
- }
-
- /* find the TCP header */
- tcp = (struct tcphdr *) (ip + 1);
-
- /* ensure no bits set besides ack or psh */
- if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
- return -1;
-
- /* check for timestamps. Since the only option we handle are
- timestamps, we only have to handle the simple case of
- aligned timestamps */
-
- opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
- tcp_hdr_len = sizeof (*tcp) + opt_bytes;
- ts_ptr = (uint32_t *)(tcp + 1);
- if (opt_bytes != 0) {
- if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
- (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
- return -1;
- }
-
- ip_len = ntohs(ip->ip_len);
- tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
-
-
- /*
- * If frame is padded beyond the end of the IP packet,
- * then we must trim the extra bytes off the end.
- */
- tot_len = m_head->m_pkthdr.len;
- trim = tot_len - (ip_len + ETHER_HDR_LEN);
- if (trim != 0) {
- if (trim < 0) {
- /* truncated packet */
- return -1;
- }
- m_adj(m_head, -trim);
- tot_len = m_head->m_pkthdr.len;
- }
-
- m_nxt = m_head;
- m_tail = NULL; /* -Wuninitialized */
- while (m_nxt != NULL) {
- m_tail = m_nxt;
- m_nxt = m_tail->m_next;
- }
-
- hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
- seq = ntohl(tcp->th_seq);
-
- SLIST_FOREACH(lro, &ss->lro_active, next) {
- if (lro->source_port == tcp->th_sport &&
- lro->dest_port == tcp->th_dport &&
- lro->source_ip == ip->ip_src.s_addr &&
- lro->dest_ip == ip->ip_dst.s_addr) {
- /* Try to append it */
-
- if (__predict_false(seq != lro->next_seq ||
- (tcp_data_len == 0 &&
- lro->ack_seq == tcp->th_ack))) {
- /* out of order packet or dup ack */
- SLIST_REMOVE(&ss->lro_active, lro,
- lro_entry, next);
- mxge_lro_flush(ss, lro);
- return -1;
- }
-
- if (opt_bytes) {
- uint32_t tsval = ntohl(*(ts_ptr + 1));
- /* make sure timestamp values are increasing */
- if (__predict_false(lro->tsval > tsval ||
- *(ts_ptr + 2) == 0)) {
- return -1;
- }
- lro->tsval = tsval;
- lro->tsecr = *(ts_ptr + 2);
- }
-
- lro->next_seq += tcp_data_len;
- lro->ack_seq = tcp->th_ack;
- lro->window = tcp->th_win;
- lro->append_cnt++;
- if (tcp_data_len == 0) {
- m_freem(m_head);
- return 0;
- }
- /* subtract off the checksum of the tcp header
- * from the hardware checksum, and add it to the
- * stored tcp data checksum. Byteswap the checksum
- * if the total length so far is odd
- */
- tmp_csum = mxge_csum_generic((uint16_t*)tcp,
- tcp_hdr_len);
- csum = csum + (tmp_csum ^ 0xffff);
- csum = (csum & 0xffff) + (csum >> 16);
- csum = (csum & 0xffff) + (csum >> 16);
- if (lro->len & 0x1) {
- /* Odd number of bytes so far, flip bytes */
- csum = ((csum << 8) | (csum >> 8)) & 0xffff;
- }
- csum = csum + lro->data_csum;
- csum = (csum & 0xffff) + (csum >> 16);
- csum = (csum & 0xffff) + (csum >> 16);
- lro->data_csum = csum;
-
- lro->len += tcp_data_len;
-
- /* adjust mbuf so that m->m_data points to
- the first byte of the payload */
- m_adj(m_head, hlen);
- /* append mbuf chain */
- lro->m_tail->m_next = m_head;
- /* advance the last pointer */
- lro->m_tail = m_tail;
- /* flush packet if required */
- device_mtu = ss->sc->ifp->if_mtu;
- if (lro->len > (65535 - device_mtu)) {
- SLIST_REMOVE(&ss->lro_active, lro,
- lro_entry, next);
- mxge_lro_flush(ss, lro);
- }
- return 0;
- }
- }
-
- if (SLIST_EMPTY(&ss->lro_free))
- return -1;
-
- /* start a new chain */
- lro = SLIST_FIRST(&ss->lro_free);
- SLIST_REMOVE_HEAD(&ss->lro_free, next);
- SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
- lro->source_port = tcp->th_sport;
- lro->dest_port = tcp->th_dport;
- lro->source_ip = ip->ip_src.s_addr;
- lro->dest_ip = ip->ip_dst.s_addr;
- lro->next_seq = seq + tcp_data_len;
- lro->mss = tcp_data_len;
- lro->ack_seq = tcp->th_ack;
- lro->window = tcp->th_win;
-
- /* save the checksum of just the TCP payload by
- * subtracting off the checksum of the TCP header from
- * the entire hardware checksum
- * Since IP header checksum is correct, checksum over
- * the IP header is -0. Substracting -0 is unnecessary.
- */
- tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
- csum = csum + (tmp_csum ^ 0xffff);
- csum = (csum & 0xffff) + (csum >> 16);
- csum = (csum & 0xffff) + (csum >> 16);
- lro->data_csum = csum;
-
- lro->ip = ip;
- /* record timestamp if it is present */
- if (opt_bytes) {
- lro->timestamp = 1;
- lro->tsval = ntohl(*(ts_ptr + 1));
- lro->tsecr = *(ts_ptr + 2);
- }
- lro->len = tot_len;
- lro->m_head = m_head;
- lro->m_tail = m_tail;
- return 0;
-}
-
-#endif /* INET */
-/*
- This file uses Myri10GE driver indentation.
-
- Local Variables:
- c-file-style:"linux"
- tab-width:8
- End:
-*/
OpenPOWER on IntegriCloud