summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorandre <andre@FreeBSD.org>2006-09-06 21:51:59 +0000
committerandre <andre@FreeBSD.org>2006-09-06 21:51:59 +0000
commitcb05913fd251edc3d35bcbeca73a8b681e2e58e8 (patch)
treeb5d4180eab9fff2a64a310b178d7e69374bbdacf
parent0dddb6a1cc063906ce418aa1a75805e68b4ec971 (diff)
downloadFreeBSD-src-cb05913fd251edc3d35bcbeca73a8b681e2e58e8.zip
FreeBSD-src-cb05913fd251edc3d35bcbeca73a8b681e2e58e8.tar.gz
First step of TSO (TCP segmentation offload) support in our network stack.
o add IFCAP_TSO[46] for drivers to announce this capability for IPv4 and IPv6 o add CSUM_TSO flag to mbuf pkthdr csum_flags field o add tso_segsz field to mbuf pkthdr o enhance ip_output() packet length check to allow for large TSO packets o extend tcp_maxmtu[46]() with a flag pointer to pass interface capabilities o adjust all callers of tcp_maxmtu[46]() accordingly Discussed on: -current, -net Sponsored by: TCP/IP Optimization Fundraise 2005
-rw-r--r--sys/net/if.h3
-rw-r--r--sys/netinet/ip_output.c23
-rw-r--r--sys/netinet/tcp_input.c13
-rw-r--r--sys/netinet/tcp_reass.c13
-rw-r--r--sys/netinet/tcp_subr.c24
-rw-r--r--sys/netinet/tcp_timewait.c24
-rw-r--r--sys/netinet/tcp_var.h5
-rw-r--r--sys/netinet6/icmp6.c2
-rw-r--r--sys/sys/mbuf.h2
9 files changed, 82 insertions, 27 deletions
diff --git a/sys/net/if.h b/sys/net/if.h
index b989890..cecb123 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -206,8 +206,11 @@ struct if_data {
#define IFCAP_JUMBO_MTU 0x0020 /* 9000 byte MTU supported */
#define IFCAP_POLLING 0x0040 /* driver supports polling */
#define IFCAP_VLAN_HWCSUM 0x0080 /* can do IFCAP_HWCSUM on VLANs */
+#define IFCAP_TSO4 0x0100 /* can do TCP Segmentation Offload */
+#define IFCAP_TSO6 0x0200 /* can do TCP6 Segmentation Offload */
#define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM)
+#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6)
#define IFQ_MAXLEN 50
#define IFNET_SLOWHZ 1 /* granularity is 1 second */
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 79d3806..8311732 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -495,19 +495,29 @@ passout:
/*
* If small enough for interface, or the interface will take
- * care of the fragmentation for us, can just send directly.
+ * care of the fragmentation for us, we can just send directly.
*/
- if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
- ((ip->ip_off & IP_DF) == 0))) {
+ if (ip->ip_len <= ifp->if_mtu ||
+ (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
+ ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
ip->ip_sum = in_cksum(m, hlen);
- /* Record statistics for this interface address. */
+ /*
+ * Record statistics for this interface address.
+ * With CSUM_TSO the byte/packet count will be slightly
+ * incorrect because we count the IP+TCP headers only
+ * once instead of for every generated packet.
+ */
if (!(flags & IP_FORWARDING) && ia) {
- ia->ia_ifa.if_opackets++;
+ if (m->m_pkthdr.csum_flags & CSUM_TSO)
+ ia->ia_ifa.if_opackets +=
+ m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
+ else
+ ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
}
#ifdef IPSEC
@@ -529,7 +539,8 @@ passout:
goto done;
}
- if (ip->ip_off & IP_DF) {
+ /* Balk when DF bit is set or the interface didn't support TSO. */
+ if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
error = EMSGSIZE;
/*
* This case can happen if the user changed the MTU
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 86f9325..0699193 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -2857,6 +2857,7 @@ tcp_mss(tp, offer)
struct socket *so;
struct hc_metrics_lite metrics;
int origoffer = offer;
+ int mtuflags = 0;
#ifdef INET6
int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
size_t min_protoh = isipv6 ?
@@ -2869,12 +2870,12 @@ tcp_mss(tp, offer)
/* initialize */
#ifdef INET6
if (isipv6) {
- maxmtu = tcp_maxmtu6(&inp->inp_inc);
+ maxmtu = tcp_maxmtu6(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt;
} else
#endif
{
- maxmtu = tcp_maxmtu(&inp->inp_inc);
+ maxmtu = tcp_maxmtu(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
}
so = inp->inp_socket;
@@ -3081,6 +3082,10 @@ tcp_mss(tp, offer)
tp->snd_cwnd = mss * ss_fltsz_local;
else
tp->snd_cwnd = mss * ss_fltsz;
+
+ /* Check the interface for TSO capabilities. */
+ if (mtuflags & CSUM_TSO)
+ tp->t_flags |= TF_TSO;
}
/*
@@ -3103,14 +3108,14 @@ tcp_mssopt(inc)
#ifdef INET6
if (isipv6) {
mss = tcp_v6mssdflt;
- maxmtu = tcp_maxmtu6(inc);
+ maxmtu = tcp_maxmtu6(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
} else
#endif
{
mss = tcp_mssdflt;
- maxmtu = tcp_maxmtu(inc);
+ maxmtu = tcp_maxmtu(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct tcpiphdr);
}
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 86f9325..0699193 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -2857,6 +2857,7 @@ tcp_mss(tp, offer)
struct socket *so;
struct hc_metrics_lite metrics;
int origoffer = offer;
+ int mtuflags = 0;
#ifdef INET6
int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
size_t min_protoh = isipv6 ?
@@ -2869,12 +2870,12 @@ tcp_mss(tp, offer)
/* initialize */
#ifdef INET6
if (isipv6) {
- maxmtu = tcp_maxmtu6(&inp->inp_inc);
+ maxmtu = tcp_maxmtu6(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt;
} else
#endif
{
- maxmtu = tcp_maxmtu(&inp->inp_inc);
+ maxmtu = tcp_maxmtu(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
}
so = inp->inp_socket;
@@ -3081,6 +3082,10 @@ tcp_mss(tp, offer)
tp->snd_cwnd = mss * ss_fltsz_local;
else
tp->snd_cwnd = mss * ss_fltsz;
+
+ /* Check the interface for TSO capabilities. */
+ if (mtuflags & CSUM_TSO)
+ tp->t_flags |= TF_TSO;
}
/*
@@ -3103,14 +3108,14 @@ tcp_mssopt(inc)
#ifdef INET6
if (isipv6) {
mss = tcp_v6mssdflt;
- maxmtu = tcp_maxmtu6(inc);
+ maxmtu = tcp_maxmtu6(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
} else
#endif
{
mss = tcp_mssdflt;
- maxmtu = tcp_maxmtu(inc);
+ maxmtu = tcp_maxmtu(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct tcpiphdr);
}
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 22a5e26..39acf62 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1252,7 +1252,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
* or route MTU. tcp_mtudisc()
* will do right thing by itself.
*/
- if (mtu <= tcp_maxmtu(&inc))
+ if (mtu <= tcp_maxmtu(&inc, NULL))
tcp_hc_updatemtu(&inc, mtu);
}
@@ -1531,9 +1531,9 @@ tcp_mtudisc(struct inpcb *inp, int errno)
maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
romtu =
#ifdef INET6
- isipv6 ? tcp_maxmtu6(&inp->inp_inc) :
+ isipv6 ? tcp_maxmtu6(&inp->inp_inc, NULL) :
#endif /* INET6 */
- tcp_maxmtu(&inp->inp_inc);
+ tcp_maxmtu(&inp->inp_inc, NULL);
if (!maxmtu)
maxmtu = romtu;
else
@@ -1610,7 +1610,7 @@ tcp_mtudisc(struct inpcb *inp, int errno)
* to get the interface MTU.
*/
u_long
-tcp_maxmtu(struct in_conninfo *inc)
+tcp_maxmtu(struct in_conninfo *inc, int *flags)
{
struct route sro;
struct sockaddr_in *dst;
@@ -1633,6 +1633,13 @@ tcp_maxmtu(struct in_conninfo *inc)
maxmtu = ifp->if_mtu;
else
maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+
+ /* Report additional interface capabilities. */
+ if (flags != NULL) {
+ if (ifp->if_capenable & IFCAP_TSO4 &&
+ ifp->if_hwassist & CSUM_TSO)
+ *flags |= CSUM_TSO;
+ }
RTFREE(sro.ro_rt);
}
return (maxmtu);
@@ -1640,7 +1647,7 @@ tcp_maxmtu(struct in_conninfo *inc)
#ifdef INET6
u_long
-tcp_maxmtu6(struct in_conninfo *inc)
+tcp_maxmtu6(struct in_conninfo *inc, int *flags)
{
struct route_in6 sro6;
struct ifnet *ifp;
@@ -1662,6 +1669,13 @@ tcp_maxmtu6(struct in_conninfo *inc)
else
maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
IN6_LINKMTU(sro6.ro_rt->rt_ifp));
+
+ /* Report additional interface capabilities. */
+ if (flags != NULL) {
+ if (ifp->if_capenable & IFCAP_TSO6 &&
+ ifp->if_hwassist & CSUM_TSO)
+ *flags |= CSUM_TSO;
+ }
RTFREE(sro6.ro_rt);
}
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index 22a5e26..39acf62 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -1252,7 +1252,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
* or route MTU. tcp_mtudisc()
* will do right thing by itself.
*/
- if (mtu <= tcp_maxmtu(&inc))
+ if (mtu <= tcp_maxmtu(&inc, NULL))
tcp_hc_updatemtu(&inc, mtu);
}
@@ -1531,9 +1531,9 @@ tcp_mtudisc(struct inpcb *inp, int errno)
maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
romtu =
#ifdef INET6
- isipv6 ? tcp_maxmtu6(&inp->inp_inc) :
+ isipv6 ? tcp_maxmtu6(&inp->inp_inc, NULL) :
#endif /* INET6 */
- tcp_maxmtu(&inp->inp_inc);
+ tcp_maxmtu(&inp->inp_inc, NULL);
if (!maxmtu)
maxmtu = romtu;
else
@@ -1610,7 +1610,7 @@ tcp_mtudisc(struct inpcb *inp, int errno)
* to get the interface MTU.
*/
u_long
-tcp_maxmtu(struct in_conninfo *inc)
+tcp_maxmtu(struct in_conninfo *inc, int *flags)
{
struct route sro;
struct sockaddr_in *dst;
@@ -1633,6 +1633,13 @@ tcp_maxmtu(struct in_conninfo *inc)
maxmtu = ifp->if_mtu;
else
maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+
+ /* Report additional interface capabilities. */
+ if (flags != NULL) {
+ if (ifp->if_capenable & IFCAP_TSO4 &&
+ ifp->if_hwassist & CSUM_TSO)
+ *flags |= CSUM_TSO;
+ }
RTFREE(sro.ro_rt);
}
return (maxmtu);
@@ -1640,7 +1647,7 @@ tcp_maxmtu(struct in_conninfo *inc)
#ifdef INET6
u_long
-tcp_maxmtu6(struct in_conninfo *inc)
+tcp_maxmtu6(struct in_conninfo *inc, int *flags)
{
struct route_in6 sro6;
struct ifnet *ifp;
@@ -1662,6 +1669,13 @@ tcp_maxmtu6(struct in_conninfo *inc)
else
maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
IN6_LINKMTU(sro6.ro_rt->rt_ifp));
+
+ /* Report additional interface capabilities. */
+ if (flags != NULL) {
+ if (ifp->if_capenable & IFCAP_TSO6 &&
+ ifp->if_hwassist & CSUM_TSO)
+ *flags |= CSUM_TSO;
+ }
RTFREE(sro6.ro_rt);
}
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 0831724..4aa5cda 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -114,6 +114,7 @@ struct tcpcb {
#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */
#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */
#define TF_FORCEDATA 0x800000 /* force out a byte */
+#define TF_TSO 0x1000000 /* TSO enabled on this connection */
tcp_seq snd_una; /* send unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
@@ -508,8 +509,8 @@ void tcp_init(void);
void tcp_fini(void *);
void tcp_reass_init(void);
void tcp_input(struct mbuf *, int);
-u_long tcp_maxmtu(struct in_conninfo *);
-u_long tcp_maxmtu6(struct in_conninfo *);
+u_long tcp_maxmtu(struct in_conninfo *, int *);
+u_long tcp_maxmtu6(struct in_conninfo *, int *);
void tcp_mss(struct tcpcb *, int);
int tcp_mssopt(struct in_conninfo *);
struct inpcb *
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c
index 470944c..21d4b3b 100644
--- a/sys/netinet6/icmp6.c
+++ b/sys/netinet6/icmp6.c
@@ -1132,7 +1132,7 @@ icmp6_mtudisc_update(ip6cp, validated)
if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
return;
- if (mtu < tcp_maxmtu6(&inc)) {
+ if (mtu < tcp_maxmtu6(&inc, NULL)) {
tcp_hc_updatemtu(&inc, mtu);
icmp6stat.icp6s_pmtuchg++;
}
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index eb0ff1e8..d3d03ce 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -110,6 +110,7 @@ struct pkthdr {
/* variables for hardware checksum */
int csum_flags; /* flags regarding checksum */
int csum_data; /* data field used by csum routines */
+ u_int16_t tso_segsz; /* TSO segment size */
SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
};
@@ -216,6 +217,7 @@ struct mbuf {
#define CSUM_UDP 0x0004 /* will csum UDP */
#define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */
#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
+#define CSUM_TSO 0x0020 /* will do TSO */
#define CSUM_IP_CHECKED 0x0100 /* did csum IP */
#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
OpenPOWER on IntegriCloud