diff options
author | gallatin <gallatin@FreeBSD.org> | 2013-02-21 21:28:33 +0000 |
---|---|---|
committer | gallatin <gallatin@FreeBSD.org> | 2013-02-21 21:28:33 +0000 |
commit | 966be4941492002f86456a9088733904f5559c59 (patch) | |
tree | fcbf0037b80b68ce65939bc809992a1d8136674b /sys/dev | |
parent | d9f661c57a95d2cc036dc1b269c8527f65836b4b (diff) | |
download | FreeBSD-src-966be4941492002f86456a9088733904f5559c59.zip FreeBSD-src-966be4941492002f86456a9088733904f5559c59.tar.gz |
Improve mxge's receive performance for IPv6:
- Add support for IPv6 rx csum offload
- Finally switch mxge from using its own driver lro, to
using tcp_lro
MFC after: 7 days
Sponsored by: Myricom Inc.
Diffstat (limited to 'sys/dev')
-rw-r--r-- | sys/dev/mxge/if_mxge.c | 289 | ||||
-rw-r--r-- | sys/dev/mxge/if_mxge_var.h | 32 | ||||
-rw-r--r-- | sys/dev/mxge/mxge_lro.c | 357 |
3 files changed, 148 insertions, 530 deletions
diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c index 68afbf9..fcc0825 100644 --- a/sys/dev/mxge/if_mxge.c +++ b/sys/dev/mxge/if_mxge.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip.h> #include <netinet/ip6.h> #include <netinet/tcp.h> +#include <netinet/tcp_lro.h> #include <netinet6/ip6_var.h> #include <machine/bus.h> @@ -102,7 +103,6 @@ static int mxge_intr_coal_delay = 30; static int mxge_deassert_wait = 1; static int mxge_flow_control = 1; static int mxge_verbose = 0; -static int mxge_lro_cnt = 8; static int mxge_ticks; static int mxge_max_slices = 1; static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; @@ -1311,9 +1311,9 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup) ss->tx.stall = 0; ss->rx_big.cnt = 0; ss->rx_small.cnt = 0; - ss->lro_bad_csum = 0; - ss->lro_queued = 0; - ss->lro_flushed = 0; + ss->lc.lro_bad_csum = 0; + ss->lc.lro_queued = 0; + ss->lc.lro_flushed = 0; if (ss->fw_stats != NULL) { bzero(ss->fw_stats, sizeof *ss->fw_stats); } @@ -1414,50 +1414,6 @@ mxge_change_flow_control(SYSCTL_HANDLER_ARGS) } static int -mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) -{ - struct ifnet *ifp; - int err = 0; - - ifp = sc->ifp; - if (lro_cnt == 0) - ifp->if_capenable &= ~IFCAP_LRO; - else - ifp->if_capenable |= IFCAP_LRO; - sc->lro_cnt = lro_cnt; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc, 0); - err = mxge_open(sc); - } - return err; -} - -static int -mxge_change_lro(SYSCTL_HANDLER_ARGS) -{ - mxge_softc_t *sc; - unsigned int lro_cnt; - int err; - - sc = arg1; - lro_cnt = sc->lro_cnt; - err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); - if (err != 0) - return err; - - if (lro_cnt == sc->lro_cnt) - return 0; - - if (lro_cnt > 128) - return EINVAL; - - mtx_lock(&sc->driver_mtx); - err = mxge_change_lro_locked(sc, lro_cnt); - mtx_unlock(&sc->driver_mtx); - return err; -} - -static int mxge_handle_be32(SYSCTL_HANDLER_ARGS) { int err; @@ -1653,14 +1609,6 @@ mxge_add_sysctls(mxge_softc_t *sc) CTLFLAG_RW, &mxge_verbose, 0, "verbose printing"); - /* lro */ - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, - "lro_cnt", - CTLTYPE_INT|CTLFLAG_RW, sc, - 0, mxge_change_lro, - "I", "number of lro merge queues"); - - /* add counters exported for debugging from all slices */ sysctl_ctx_init(&sc->slice_sysctl_ctx); sc->slice_sysctl_tree = @@ -1686,11 +1634,15 @@ mxge_add_sysctls(mxge_softc_t *sc) CTLFLAG_RD, &ss->rx_big.cnt, 0, "rx_small_cnt"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, + "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 0, "number of lro merge queues flushed"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "lro_queued", CTLFLAG_RD, &ss->lro_queued, + "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, + 0, "number of bad csums preventing LRO"); + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 0, "number of frames appended to lro merge" "queues"); @@ -2534,6 +2486,64 @@ done: return err; } +#ifdef INET6 + +static uint16_t +mxge_csum_generic(uint16_t *raw, int len) +{ + uint32_t csum; + + + csum = 0; + while (len > 0) { + csum += *raw; + raw++; + len -= 2; + } + csum = (csum >> 16) + (csum & 0xffff); + csum = (csum >> 16) + (csum & 0xffff); + return (uint16_t)csum; +} + +static inline uint16_t +mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) +{ + uint32_t partial; + int nxt, cksum_offset; + struct ip6_hdr *ip6 = p; + uint16_t c; + + nxt = ip6->ip6_nxt; + cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; + if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { + cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, + IPPROTO_IPV6, &nxt); + if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) + return (1); + } + + /* + * IPv6 headers do not contain a checksum, and hence + * do not checksum to zero, so they don't "fall out" + * of the partial checksum calculation like IPv4 + * headers do. We need to fix the partial checksum by + * subtracting the checksum of the IPv6 header. + */ + + partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - + ETHER_HDR_LEN); + csum += ~partial; + csum += (csum < ~partial); + csum = (csum >> 16) + (csum & 0xFFFF); + csum = (csum >> 16) + (csum & 0xFFFF); + c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, + csum); + +// printf("%d %d %x %x %x %x %x\n", m->m_pkthdr.len, cksum_offset, c, csum, ocsum, partial, d); + c ^= 0xffff; + return (c); +} +#endif /* INET6 */ /* * Myri10GE hardware checksums are not valid if the sender * padded the frame with non-zero padding. This is because @@ -2547,26 +2557,39 @@ static inline uint16_t mxge_rx_csum(struct mbuf *m, int csum) { struct ether_header *eh; +#ifdef INET struct ip *ip; - uint16_t c; +#endif + int cap = m->m_pkthdr.rcvif->if_capenable; + uint16_t c, etype; - eh = mtod(m, struct ether_header *); - /* only deal with IPv4 TCP & UDP for now */ - if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) - return 1; - ip = (struct ip *)(eh + 1); - if (__predict_false(ip->ip_p != IPPROTO_TCP && - ip->ip_p != IPPROTO_UDP)) - return 1; + eh = mtod(m, struct ether_header *); + etype = ntohs(eh->ether_type); + switch (etype) { #ifdef INET - c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htonl(ntohs(csum) + ntohs(ip->ip_len) + - - (ip->ip_hl << 2) + ip->ip_p)); -#else - c = 1; + case ETHERTYPE_IP: + if ((cap & IFCAP_RXCSUM) == 0) + return (1); + ip = (struct ip *)(eh + 1); + if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) + return (1); + c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htonl(ntohs(csum) + ntohs(ip->ip_len) - + (ip->ip_hl << 2) + ip->ip_p)); + c ^= 0xffff; + break; #endif - c ^= 0xffff; +#ifdef INET6 + case ETHERTYPE_IPV6: + if ((cap & IFCAP_RXCSUM_IPV6) == 0) + return (1); + c = mxge_rx_csum6((eh + 1), m, csum); + break; +#endif + default: + c = 1; + } return (c); } @@ -2628,7 +2651,8 @@ mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) static inline void -mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) +mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, + uint32_t csum, int lro) { mxge_softc_t *sc; struct ifnet *ifp; @@ -2637,7 +2661,6 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) mxge_rx_ring_t *rx; bus_dmamap_t old_map; int idx; - uint16_t tcpudp_csum; sc = ss->sc; ifp = sc->ifp; @@ -2674,14 +2697,18 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) mxge_vlan_tag_remove(m, &csum); } /* if the checksum is valid, mark it in the mbuf header */ - if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { - if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) - return; - /* otherwise, it was a UDP frame, or a TCP frame which - we could not do LRO on. Tell the stack that the - checksum is good */ + + if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && + (0 == mxge_rx_csum(m, csum))) { + /* Tell the stack that the checksum is good */ m->m_pkthdr.csum_data = 0xffff; - m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; + m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | + CSUM_DATA_VALID; + +#if defined(INET) || defined (INET6) + if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) + return; +#endif } /* flowid only valid if RSS hashing is enabled */ if (sc->num_slices > 1) { @@ -2693,7 +2720,8 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) } static inline void -mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) +mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, + uint32_t csum, int lro) { mxge_softc_t *sc; struct ifnet *ifp; @@ -2702,7 +2730,6 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) mxge_rx_ring_t *rx; bus_dmamap_t old_map; int idx; - uint16_t tcpudp_csum; sc = ss->sc; ifp = sc->ifp; @@ -2739,14 +2766,17 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) mxge_vlan_tag_remove(m, &csum); } /* if the checksum is valid, mark it in the mbuf header */ - if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { - if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) - return; - /* otherwise, it was a UDP frame, or a TCP frame which - we could not do LRO on. Tell the stack that the - checksum is good */ + if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && + (0 == mxge_rx_csum(m, csum))) { + /* Tell the stack that the checksum is good */ m->m_pkthdr.csum_data = 0xffff; - m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; + m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | + CSUM_DATA_VALID; + +#if defined(INET) || defined (INET6) + if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) + return; +#endif } /* flowid only valid if RSS hashing is enabled */ if (sc->num_slices > 1) { @@ -2764,16 +2794,17 @@ mxge_clean_rx_done(struct mxge_slice_state *ss) int limit = 0; uint16_t length; uint16_t checksum; + int lro; - + lro = ss->sc->ifp->if_capenable & IFCAP_LRO; while (rx_done->entry[rx_done->idx].length != 0) { length = ntohs(rx_done->entry[rx_done->idx].length); rx_done->entry[rx_done->idx].length = 0; checksum = rx_done->entry[rx_done->idx].checksum; if (length <= (MHLEN - MXGEFW_PAD)) - mxge_rx_done_small(ss, length, checksum); + mxge_rx_done_small(ss, length, checksum, lro); else - mxge_rx_done_big(ss, length, checksum); + mxge_rx_done_big(ss, length, checksum, lro); rx_done->cnt++; rx_done->idx = rx_done->cnt & rx_done->mask; @@ -2781,11 +2812,11 @@ mxge_clean_rx_done(struct mxge_slice_state *ss) if (__predict_false(++limit > rx_done->mask / 2)) break; } -#ifdef INET - while (!SLIST_EMPTY(&ss->lro_active)) { - struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); - SLIST_REMOVE_HEAD(&ss->lro_active, next); - mxge_lro_flush(ss, lro); +#if defined(INET) || defined (INET6) + while (!SLIST_EMPTY(&ss->lc.lro_active)) { + struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active); + SLIST_REMOVE_HEAD(&ss->lc.lro_active, next); + tcp_lro_flush(&ss->lc, lro); } #endif } @@ -3153,15 +3184,11 @@ mxge_init(void *arg) static void mxge_free_slice_mbufs(struct mxge_slice_state *ss) { - struct lro_entry *lro_entry; int i; - while (!SLIST_EMPTY(&ss->lro_free)) { - lro_entry = SLIST_FIRST(&ss->lro_free); - SLIST_REMOVE_HEAD(&ss->lro_free, next); - free(lro_entry, M_DEVBUF); - } - +#if defined(INET) || defined(INET6) + tcp_lro_free(&ss->lc); +#endif for (i = 0; i <= ss->rx_big.mask; i++) { if (ss->rx_big.info[i].m == NULL) continue; @@ -3545,26 +3572,17 @@ mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) mxge_softc_t *sc; mxge_cmd_t cmd; bus_dmamap_t map; - struct lro_entry *lro_entry; int err, i, slice; sc = ss->sc; slice = ss - sc->ss; - SLIST_INIT(&ss->lro_free); - SLIST_INIT(&ss->lro_active); - - for (i = 0; i < sc->lro_cnt; i++) { - lro_entry = (struct lro_entry *) - malloc(sizeof (*lro_entry), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (lro_entry == NULL) { - sc->lro_cnt = i; - break; - } - SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); - } +#if defined(INET) || defined(INET6) + (void)tcp_lro_init(&ss->lc); +#endif + ss->lc.ifp = sc->ifp; + /* get the lanai pointers to the send and receive rings */ err = 0; @@ -4219,10 +4237,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) } else if (mask & IFCAP_RXCSUM) { if (IFCAP_RXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_RXCSUM; - sc->csum_flag = 0; } else { ifp->if_capenable |= IFCAP_RXCSUM; - sc->csum_flag = 1; } } if (mask & IFCAP_TSO4) { @@ -4249,16 +4265,12 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); } -#ifdef NOTYET - } else if (mask & IFCAP_RXCSUM6) { - if (IFCAP_RXCSUM6 & ifp->if_capenable) { - ifp->if_capenable &= ~IFCAP_RXCSUM6; - sc->csum_flag = 0; + } else if (mask & IFCAP_RXCSUM_IPV6) { + if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { + ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; } else { - ifp->if_capenable |= IFCAP_RXCSUM6; - sc->csum_flag = 1; + ifp->if_capenable |= IFCAP_RXCSUM_IPV6; } -#endif } if (mask & IFCAP_TSO6) { if (IFCAP_TSO6 & ifp->if_capenable) { @@ -4274,12 +4286,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) } #endif /*IFCAP_TSO6 */ - if (mask & IFCAP_LRO) { - if (IFCAP_LRO & ifp->if_capenable) - err = mxge_change_lro_locked(sc, 0); - else - err = mxge_change_lro_locked(sc, mxge_lro_cnt); - } + if (mask & IFCAP_LRO) + ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) @@ -4326,14 +4334,11 @@ mxge_fetch_tunables(mxge_softc_t *sc) TUNABLE_INT_FETCH("hw.mxge.verbose", &mxge_verbose); TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); - TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); - if (sc->lro_cnt != 0) - mxge_lro_cnt = sc->lro_cnt; if (bootverbose) mxge_verbose = 1; @@ -4897,8 +4902,9 @@ mxge_attach(device_t dev) if_initbaudrate(ifp, IF_Gbps(10)); ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | - IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6; -#ifdef INET + IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | + IFCAP_RXCSUM_IPV6; +#if defined(INET) || defined(INET6) ifp->if_capabilities |= IFCAP_LRO; #endif @@ -4929,7 +4935,6 @@ mxge_attach(device_t dev) ifp->if_capenable = ifp->if_capabilities; if (sc->lro_cnt == 0) ifp->if_capenable &= ~IFCAP_LRO; - sc->csum_flag = 1; ifp->if_init = mxge_init; ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; diff --git a/sys/dev/mxge/if_mxge_var.h b/sys/dev/mxge/if_mxge_var.h index 9393577..7ca1f39 100644 --- a/sys/dev/mxge/if_mxge_var.h +++ b/sys/dev/mxge/if_mxge_var.h @@ -194,31 +194,6 @@ typedef struct char mtx_name[16]; } mxge_tx_ring_t; -struct lro_entry; -struct lro_entry -{ - SLIST_ENTRY(lro_entry) next; - struct mbuf *m_head; - struct mbuf *m_tail; - int timestamp; - struct ip *ip; - uint32_t tsval; - uint32_t tsecr; - uint32_t source_ip; - uint32_t dest_ip; - uint32_t next_seq; - uint32_t ack_seq; - uint32_t len; - uint32_t data_csum; - uint16_t window; - uint16_t source_port; - uint16_t dest_port; - uint16_t append_cnt; - uint16_t mss; - -}; -SLIST_HEAD(lro_head, lro_entry); - struct mxge_softc; typedef struct mxge_softc mxge_softc_t; @@ -236,11 +211,7 @@ struct mxge_slice_state { u_long omcasts; u_long oerrors; int if_drv_flags; - struct lro_head lro_active; - struct lro_head lro_free; - int lro_queued; - int lro_flushed; - int lro_bad_csum; + struct lro_ctrl lc; mxge_dma_t fw_stats_dma; struct sysctl_oid *sysctl_tree; struct sysctl_ctx_list sysctl_ctx; @@ -250,7 +221,6 @@ struct mxge_slice_state { struct mxge_softc { struct ifnet* ifp; struct mxge_slice_state *ss; - int csum_flag; /* rx_csums? */ int tx_boundary; /* boundary transmits cannot cross*/ int lro_cnt; bus_dma_tag_t parent_dmat; diff --git a/sys/dev/mxge/mxge_lro.c b/sys/dev/mxge/mxge_lro.c deleted file mode 100644 index b313059..0000000 --- a/sys/dev/mxge/mxge_lro.c +++ /dev/null @@ -1,357 +0,0 @@ -/****************************************************************************** - -Copyright (c) 2007-2008, Myricom Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Neither the name of the Myricom Inc, nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -***************************************************************************/ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/endian.h> -#include <sys/mbuf.h> -#include <sys/kernel.h> -#include <sys/socket.h> -#include <sys/sysctl.h> -#include <sys/bus.h> - -#include <net/if.h> -#include <net/ethernet.h> -#include <net/if_media.h> - -#include <netinet/in_systm.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/tcp.h> - -#include <machine/bus.h> -#include <machine/in_cksum.h> - -#include <dev/mxge/mxge_mcp.h> -#include <dev/mxge/if_mxge_var.h> - -#include "opt_inet.h" - -#ifdef INET - -/* Assume len is a multiple of 4 */ -static uint16_t -mxge_csum_generic(uint16_t *raw, int len) -{ - uint32_t csum; - csum = 0; - while (len > 0) { - csum += *raw; - raw++; - csum += *raw; - raw++; - len -= 4; - } - csum = (csum >> 16) + (csum & 0xffff); - csum = (csum >> 16) + (csum & 0xffff); - return (uint16_t)csum; -} - - -void -mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro) -{ - mxge_softc_t *mgp = ss->sc; - struct ifnet *ifp; - struct ip *ip; - struct tcphdr *tcp; - uint32_t *ts_ptr; - uint32_t tcplen, tcp_csum; - - if (lro->append_cnt) { - /* incorporate the new len into the ip header and - * re-calculate the checksum */ - ip = lro->ip; - ip->ip_len = htons(lro->len - ETHER_HDR_LEN); - ip->ip_sum = 0; - ip->ip_sum = 0xffff ^ - mxge_csum_generic((uint16_t*)ip, - sizeof (*ip)); - - lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED | - CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - lro->m_head->m_pkthdr.csum_data = 0xffff; - lro->m_head->m_pkthdr.len = lro->len; - - /* incorporate the latest ack into the tcp header */ - tcp = (struct tcphdr *) (ip + 1); - tcp->th_ack = lro->ack_seq; - tcp->th_win = lro->window; - /* incorporate latest timestamp into the tcp header */ - if (lro->timestamp) { - ts_ptr = (uint32_t *)(tcp + 1); - ts_ptr[1] = htonl(lro->tsval); - ts_ptr[2] = lro->tsecr; - } - /* - * update checksum in tcp header by re-calculating the - * tcp pseudoheader checksum, and adding it to the checksum - * of the tcp payload data - */ - tcp->th_sum = 0; - tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN; - tcp_csum = lro->data_csum; - tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(tcplen + IPPROTO_TCP)); - tcp_csum += mxge_csum_generic((uint16_t*)tcp, - tcp->th_off << 2); - tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); - tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); -#if 0 - IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n", - in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(tcplen + IPPROTO_TCP)), - mxge_csum_generic((uint16_t*)tcp, - tcp->th_off << 2), - htons(0xffff ^ tcp_csum)); -#endif - tcp->th_sum = 0xffff ^ tcp_csum; - } - ifp = mgp->ifp; - (*ifp->if_input)(mgp->ifp, lro->m_head); - ss->lro_queued += lro->append_cnt + 1; - ss->lro_flushed++; - lro->m_head = NULL; - lro->timestamp = 0; - lro->append_cnt = 0; - SLIST_INSERT_HEAD(&ss->lro_free, lro, next); -} - -int -mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum) -{ - struct ether_header *eh; - struct ip *ip; - struct tcphdr *tcp; - uint32_t *ts_ptr; - struct mbuf *m_nxt, *m_tail; - struct lro_entry *lro; - int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len; - int opt_bytes, trim; - uint32_t seq, tmp_csum, device_mtu; - - eh = mtod(m_head, struct ether_header *); - if (eh->ether_type != htons(ETHERTYPE_IP)) - return 1; - ip = (struct ip *) (eh + 1); - if (ip->ip_p != IPPROTO_TCP) - return 1; - - /* ensure there are no options */ - if ((ip->ip_hl << 2) != sizeof (*ip)) - return -1; - - /* .. and the packet is not fragmented */ - if (ip->ip_off & htons(IP_MF|IP_OFFMASK)) - return -1; - - /* verify that the IP header checksum is correct */ - tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip)); - if (__predict_false((tmp_csum ^ 0xffff) != 0)) { - ss->lro_bad_csum++; - return -1; - } - - /* find the TCP header */ - tcp = (struct tcphdr *) (ip + 1); - - /* ensure no bits set besides ack or psh */ - if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0) - return -1; - - /* check for timestamps. Since the only option we handle are - timestamps, we only have to handle the simple case of - aligned timestamps */ - - opt_bytes = (tcp->th_off << 2) - sizeof (*tcp); - tcp_hdr_len = sizeof (*tcp) + opt_bytes; - ts_ptr = (uint32_t *)(tcp + 1); - if (opt_bytes != 0) { - if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) || - (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))) - return -1; - } - - ip_len = ntohs(ip->ip_len); - tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip); - - - /* - * If frame is padded beyond the end of the IP packet, - * then we must trim the extra bytes off the end. - */ - tot_len = m_head->m_pkthdr.len; - trim = tot_len - (ip_len + ETHER_HDR_LEN); - if (trim != 0) { - if (trim < 0) { - /* truncated packet */ - return -1; - } - m_adj(m_head, -trim); - tot_len = m_head->m_pkthdr.len; - } - - m_nxt = m_head; - m_tail = NULL; /* -Wuninitialized */ - while (m_nxt != NULL) { - m_tail = m_nxt; - m_nxt = m_tail->m_next; - } - - hlen = ip_len + ETHER_HDR_LEN - tcp_data_len; - seq = ntohl(tcp->th_seq); - - SLIST_FOREACH(lro, &ss->lro_active, next) { - if (lro->source_port == tcp->th_sport && - lro->dest_port == tcp->th_dport && - lro->source_ip == ip->ip_src.s_addr && - lro->dest_ip == ip->ip_dst.s_addr) { - /* Try to append it */ - - if (__predict_false(seq != lro->next_seq || - (tcp_data_len == 0 && - lro->ack_seq == tcp->th_ack))) { - /* out of order packet or dup ack */ - SLIST_REMOVE(&ss->lro_active, lro, - lro_entry, next); - mxge_lro_flush(ss, lro); - return -1; - } - - if (opt_bytes) { - uint32_t tsval = ntohl(*(ts_ptr + 1)); - /* make sure timestamp values are increasing */ - if (__predict_false(lro->tsval > tsval || - *(ts_ptr + 2) == 0)) { - return -1; - } - lro->tsval = tsval; - lro->tsecr = *(ts_ptr + 2); - } - - lro->next_seq += tcp_data_len; - lro->ack_seq = tcp->th_ack; - lro->window = tcp->th_win; - lro->append_cnt++; - if (tcp_data_len == 0) { - m_freem(m_head); - return 0; - } - /* subtract off the checksum of the tcp header - * from the hardware checksum, and add it to the - * stored tcp data checksum. Byteswap the checksum - * if the total length so far is odd - */ - tmp_csum = mxge_csum_generic((uint16_t*)tcp, - tcp_hdr_len); - csum = csum + (tmp_csum ^ 0xffff); - csum = (csum & 0xffff) + (csum >> 16); - csum = (csum & 0xffff) + (csum >> 16); - if (lro->len & 0x1) { - /* Odd number of bytes so far, flip bytes */ - csum = ((csum << 8) | (csum >> 8)) & 0xffff; - } - csum = csum + lro->data_csum; - csum = (csum & 0xffff) + (csum >> 16); - csum = (csum & 0xffff) + (csum >> 16); - lro->data_csum = csum; - - lro->len += tcp_data_len; - - /* adjust mbuf so that m->m_data points to - the first byte of the payload */ - m_adj(m_head, hlen); - /* append mbuf chain */ - lro->m_tail->m_next = m_head; - /* advance the last pointer */ - lro->m_tail = m_tail; - /* flush packet if required */ - device_mtu = ss->sc->ifp->if_mtu; - if (lro->len > (65535 - device_mtu)) { - SLIST_REMOVE(&ss->lro_active, lro, - lro_entry, next); - mxge_lro_flush(ss, lro); - } - return 0; - } - } - - if (SLIST_EMPTY(&ss->lro_free)) - return -1; - - /* start a new chain */ - lro = SLIST_FIRST(&ss->lro_free); - SLIST_REMOVE_HEAD(&ss->lro_free, next); - SLIST_INSERT_HEAD(&ss->lro_active, lro, next); - lro->source_port = tcp->th_sport; - lro->dest_port = tcp->th_dport; - lro->source_ip = ip->ip_src.s_addr; - lro->dest_ip = ip->ip_dst.s_addr; - lro->next_seq = seq + tcp_data_len; - lro->mss = tcp_data_len; - lro->ack_seq = tcp->th_ack; - lro->window = tcp->th_win; - - /* save the checksum of just the TCP payload by - * subtracting off the checksum of the TCP header from - * the entire hardware checksum - * Since IP header checksum is correct, checksum over - * the IP header is -0. Substracting -0 is unnecessary. - */ - tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len); - csum = csum + (tmp_csum ^ 0xffff); - csum = (csum & 0xffff) + (csum >> 16); - csum = (csum & 0xffff) + (csum >> 16); - lro->data_csum = csum; - - lro->ip = ip; - /* record timestamp if it is present */ - if (opt_bytes) { - lro->timestamp = 1; - lro->tsval = ntohl(*(ts_ptr + 1)); - lro->tsecr = *(ts_ptr + 2); - } - lro->len = tot_len; - lro->m_head = m_head; - lro->m_tail = m_tail; - return 0; -} - -#endif /* INET */ -/* - This file uses Myri10GE driver indentation. - - Local Variables: - c-file-style:"linux" - tab-width:8 - End: -*/ |