diff options
author | andre <andre@FreeBSD.org> | 2006-09-06 21:51:59 +0000 |
---|---|---|
committer | andre <andre@FreeBSD.org> | 2006-09-06 21:51:59 +0000 |
commit | cb05913fd251edc3d35bcbeca73a8b681e2e58e8 (patch) | |
tree | b5d4180eab9fff2a64a310b178d7e69374bbdacf | |
parent | 0dddb6a1cc063906ce418aa1a75805e68b4ec971 (diff) | |
download | FreeBSD-src-cb05913fd251edc3d35bcbeca73a8b681e2e58e8.zip FreeBSD-src-cb05913fd251edc3d35bcbeca73a8b681e2e58e8.tar.gz |
First step of TSO (TCP segmentation offload) support in our network stack.
o add IFCAP_TSO[46] for drivers to announce this capability for IPv4 and IPv6
o add CSUM_TSO flag to mbuf pkthdr csum_flags field
o add tso_segsz field to mbuf pkthdr
o enhance ip_output() packet length check to allow for large TSO packets
o extend tcp_maxmtu[46]() with a flag pointer to pass interface capabilities
o adjust all callers of tcp_maxmtu[46]() accordingly
Discussed on: -current, -net
Sponsored by: TCP/IP Optimization Fundraise 2005
-rw-r--r-- | sys/net/if.h | 3 | ||||
-rw-r--r-- | sys/netinet/ip_output.c | 23 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 13 | ||||
-rw-r--r-- | sys/netinet/tcp_reass.c | 13 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 24 | ||||
-rw-r--r-- | sys/netinet/tcp_timewait.c | 24 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 5 | ||||
-rw-r--r-- | sys/netinet6/icmp6.c | 2 | ||||
-rw-r--r-- | sys/sys/mbuf.h | 2 |
9 files changed, 82 insertions, 27 deletions
diff --git a/sys/net/if.h b/sys/net/if.h index b989890..cecb123 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -206,8 +206,11 @@ struct if_data { #define IFCAP_JUMBO_MTU 0x0020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x0040 /* driver supports polling */ #define IFCAP_VLAN_HWCSUM 0x0080 /* can do IFCAP_HWCSUM on VLANs */ +#define IFCAP_TSO4 0x0100 /* can do TCP Segmentation Offload */ +#define IFCAP_TSO6 0x0200 /* can do TCP6 Segmentation Offload */ #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) +#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 79d3806..8311732 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -495,19 +495,29 @@ passout: /* * If small enough for interface, or the interface will take - * care of the fragmentation for us, can just send directly. + * care of the fragmentation for us, we can just send directly. */ - if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT && - ((ip->ip_off & IP_DF) == 0))) { + if (ip->ip_len <= ifp->if_mtu || + (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || + ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) ip->ip_sum = in_cksum(m, hlen); - /* Record statistics for this interface address. */ + /* + * Record statistics for this interface address. + * With CSUM_TSO the byte/packet count will be slightly + * incorrect because we count the IP+TCP headers only + * once instead of for every generated packet. + */ if (!(flags & IP_FORWARDING) && ia) { - ia->ia_ifa.if_opackets++; + if (m->m_pkthdr.csum_flags & CSUM_TSO) + ia->ia_ifa.if_opackets += + m->m_pkthdr.len / m->m_pkthdr.tso_segsz; + else + ia->ia_ifa.if_opackets++; ia->ia_ifa.if_obytes += m->m_pkthdr.len; } #ifdef IPSEC @@ -529,7 +539,8 @@ passout: goto done; } - if (ip->ip_off & IP_DF) { + /* Balk when DF bit is set or the interface didn't support TSO. */ + if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { error = EMSGSIZE; /* * This case can happen if the user changed the MTU diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 86f9325..0699193 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -2857,6 +2857,7 @@ tcp_mss(tp, offer) struct socket *so; struct hc_metrics_lite metrics; int origoffer = offer; + int mtuflags = 0; #ifdef INET6 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; size_t min_protoh = isipv6 ? @@ -2869,12 +2870,12 @@ tcp_mss(tp, offer) /* initialize */ #ifdef INET6 if (isipv6) { - maxmtu = tcp_maxmtu6(&inp->inp_inc); + maxmtu = tcp_maxmtu6(&inp->inp_inc, &mtuflags); tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt; } else #endif { - maxmtu = tcp_maxmtu(&inp->inp_inc); + maxmtu = tcp_maxmtu(&inp->inp_inc, &mtuflags); tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; } so = inp->inp_socket; @@ -3081,6 +3082,10 @@ tcp_mss(tp, offer) tp->snd_cwnd = mss * ss_fltsz_local; else tp->snd_cwnd = mss * ss_fltsz; + + /* Check the interface for TSO capabilities. */ + if (mtuflags & CSUM_TSO) + tp->t_flags |= TF_TSO; } /* @@ -3103,14 +3108,14 @@ tcp_mssopt(inc) #ifdef INET6 if (isipv6) { mss = tcp_v6mssdflt; - maxmtu = tcp_maxmtu6(inc); + maxmtu = tcp_maxmtu6(inc, NULL); thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); } else #endif { mss = tcp_mssdflt; - maxmtu = tcp_maxmtu(inc); + maxmtu = tcp_maxmtu(inc, NULL); thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ min_protoh = sizeof(struct tcpiphdr); } diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 86f9325..0699193 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -2857,6 +2857,7 @@ tcp_mss(tp, offer) struct socket *so; struct hc_metrics_lite metrics; int origoffer = offer; + int mtuflags = 0; #ifdef INET6 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; size_t min_protoh = isipv6 ? @@ -2869,12 +2870,12 @@ tcp_mss(tp, offer) /* initialize */ #ifdef INET6 if (isipv6) { - maxmtu = tcp_maxmtu6(&inp->inp_inc); + maxmtu = tcp_maxmtu6(&inp->inp_inc, &mtuflags); tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt; } else #endif { - maxmtu = tcp_maxmtu(&inp->inp_inc); + maxmtu = tcp_maxmtu(&inp->inp_inc, &mtuflags); tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; } so = inp->inp_socket; @@ -3081,6 +3082,10 @@ tcp_mss(tp, offer) tp->snd_cwnd = mss * ss_fltsz_local; else tp->snd_cwnd = mss * ss_fltsz; + + /* Check the interface for TSO capabilities. */ + if (mtuflags & CSUM_TSO) + tp->t_flags |= TF_TSO; } /* @@ -3103,14 +3108,14 @@ tcp_mssopt(inc) #ifdef INET6 if (isipv6) { mss = tcp_v6mssdflt; - maxmtu = tcp_maxmtu6(inc); + maxmtu = tcp_maxmtu6(inc, NULL); thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); } else #endif { mss = tcp_mssdflt; - maxmtu = tcp_maxmtu(inc); + maxmtu = tcp_maxmtu(inc, NULL); thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */ min_protoh = sizeof(struct tcpiphdr); } diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 22a5e26..39acf62 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1252,7 +1252,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) * or route MTU. tcp_mtudisc() * will do right thing by itself. */ - if (mtu <= tcp_maxmtu(&inc)) + if (mtu <= tcp_maxmtu(&inc, NULL)) tcp_hc_updatemtu(&inc, mtu); } @@ -1531,9 +1531,9 @@ tcp_mtudisc(struct inpcb *inp, int errno) maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */ romtu = #ifdef INET6 - isipv6 ? tcp_maxmtu6(&inp->inp_inc) : + isipv6 ? tcp_maxmtu6(&inp->inp_inc, NULL) : #endif /* INET6 */ - tcp_maxmtu(&inp->inp_inc); + tcp_maxmtu(&inp->inp_inc, NULL); if (!maxmtu) maxmtu = romtu; else @@ -1610,7 +1610,7 @@ tcp_mtudisc(struct inpcb *inp, int errno) * to get the interface MTU. */ u_long -tcp_maxmtu(struct in_conninfo *inc) +tcp_maxmtu(struct in_conninfo *inc, int *flags) { struct route sro; struct sockaddr_in *dst; @@ -1633,6 +1633,13 @@ tcp_maxmtu(struct in_conninfo *inc) maxmtu = ifp->if_mtu; else maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu); + + /* Report additional interface capabilities. */ + if (flags != NULL) { + if (ifp->if_capenable & IFCAP_TSO4 && + ifp->if_hwassist & CSUM_TSO) + *flags |= CSUM_TSO; + } RTFREE(sro.ro_rt); } return (maxmtu); @@ -1640,7 +1647,7 @@ tcp_maxmtu(struct in_conninfo *inc) #ifdef INET6 u_long -tcp_maxmtu6(struct in_conninfo *inc) +tcp_maxmtu6(struct in_conninfo *inc, int *flags) { struct route_in6 sro6; struct ifnet *ifp; @@ -1662,6 +1669,13 @@ tcp_maxmtu6(struct in_conninfo *inc) else maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu, IN6_LINKMTU(sro6.ro_rt->rt_ifp)); + + /* Report additional interface capabilities. */ + if (flags != NULL) { + if (ifp->if_capenable & IFCAP_TSO6 && + ifp->if_hwassist & CSUM_TSO) + *flags |= CSUM_TSO; + } RTFREE(sro6.ro_rt); } diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 22a5e26..39acf62 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -1252,7 +1252,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) * or route MTU. tcp_mtudisc() * will do right thing by itself. */ - if (mtu <= tcp_maxmtu(&inc)) + if (mtu <= tcp_maxmtu(&inc, NULL)) tcp_hc_updatemtu(&inc, mtu); } @@ -1531,9 +1531,9 @@ tcp_mtudisc(struct inpcb *inp, int errno) maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */ romtu = #ifdef INET6 - isipv6 ? tcp_maxmtu6(&inp->inp_inc) : + isipv6 ? tcp_maxmtu6(&inp->inp_inc, NULL) : #endif /* INET6 */ - tcp_maxmtu(&inp->inp_inc); + tcp_maxmtu(&inp->inp_inc, NULL); if (!maxmtu) maxmtu = romtu; else @@ -1610,7 +1610,7 @@ tcp_mtudisc(struct inpcb *inp, int errno) * to get the interface MTU. */ u_long -tcp_maxmtu(struct in_conninfo *inc) +tcp_maxmtu(struct in_conninfo *inc, int *flags) { struct route sro; struct sockaddr_in *dst; @@ -1633,6 +1633,13 @@ tcp_maxmtu(struct in_conninfo *inc) maxmtu = ifp->if_mtu; else maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu); + + /* Report additional interface capabilities. */ + if (flags != NULL) { + if (ifp->if_capenable & IFCAP_TSO4 && + ifp->if_hwassist & CSUM_TSO) + *flags |= CSUM_TSO; + } RTFREE(sro.ro_rt); } return (maxmtu); @@ -1640,7 +1647,7 @@ tcp_maxmtu(struct in_conninfo *inc) #ifdef INET6 u_long -tcp_maxmtu6(struct in_conninfo *inc) +tcp_maxmtu6(struct in_conninfo *inc, int *flags) { struct route_in6 sro6; struct ifnet *ifp; @@ -1662,6 +1669,13 @@ tcp_maxmtu6(struct in_conninfo *inc) else maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu, IN6_LINKMTU(sro6.ro_rt->rt_ifp)); + + /* Report additional interface capabilities. */ + if (flags != NULL) { + if (ifp->if_capenable & IFCAP_TSO6 && + ifp->if_hwassist & CSUM_TSO) + *flags |= CSUM_TSO; + } RTFREE(sro6.ro_rt); } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 0831724..4aa5cda 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -114,6 +114,7 @@ struct tcpcb { #define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */ #define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ #define TF_FORCEDATA 0x800000 /* force out a byte */ +#define TF_TSO 0x1000000 /* TSO enabled on this connection */ tcp_seq snd_una; /* send unacknowledged */ tcp_seq snd_max; /* highest sequence number sent; @@ -508,8 +509,8 @@ void tcp_init(void); void tcp_fini(void *); void tcp_reass_init(void); void tcp_input(struct mbuf *, int); -u_long tcp_maxmtu(struct in_conninfo *); -u_long tcp_maxmtu6(struct in_conninfo *); +u_long tcp_maxmtu(struct in_conninfo *, int *); +u_long tcp_maxmtu6(struct in_conninfo *, int *); void tcp_mss(struct tcpcb *, int); int tcp_mssopt(struct in_conninfo *); struct inpcb * diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 470944c..21d4b3b 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -1132,7 +1132,7 @@ icmp6_mtudisc_update(ip6cp, validated) if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) return; - if (mtu < tcp_maxmtu6(&inc)) { + if (mtu < tcp_maxmtu6(&inc, NULL)) { tcp_hc_updatemtu(&inc, mtu); icmp6stat.icp6s_pmtuchg++; } diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index eb0ff1e8..d3d03ce 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -110,6 +110,7 @@ struct pkthdr { /* variables for hardware checksum */ int csum_flags; /* flags regarding checksum */ int csum_data; /* data field used by csum routines */ + u_int16_t tso_segsz; /* TSO segment size */ SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ }; @@ -216,6 +217,7 @@ struct mbuf { #define CSUM_UDP 0x0004 /* will csum UDP */ #define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */ #define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ +#define CSUM_TSO 0x0020 /* will do TSO */ #define CSUM_IP_CHECKED 0x0100 /* did csum IP */ #define CSUM_IP_VALID 0x0200 /* ... the csum is valid */ |