summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authorhselasky <hselasky@FreeBSD.org>2014-11-03 12:38:29 +0000
committerhselasky <hselasky@FreeBSD.org>2014-11-03 12:38:29 +0000
commitfa183f01741aa54ff3ba0fcf31b7b1404b7a7e53 (patch)
tree1bba595c897541325069c2ad2bb01c04241c25c0 /sys/netinet
parent8dd879bfaf9ef726ec0754bd1c4368f02df4f798 (diff)
downloadFreeBSD-src-fa183f01741aa54ff3ba0fcf31b7b1404b7a7e53.zip
FreeBSD-src-fa183f01741aa54ff3ba0fcf31b7b1404b7a7e53.tar.gz
MFC r271946 and r272595:
Improve transmit sending offload, TSO, algorithm in general. This change allows all HCAs from Mellanox Technologies to function properly when TSO is enabled. See r271946 and r272595 for more details about this commit. Sponsored by: Mellanox Technologies
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/tcp_input.c2
-rw-r--r--sys/netinet/tcp_output.c106
-rw-r--r--sys/netinet/tcp_subr.c4
-rw-r--r--sys/netinet/tcp_var.h8
4 files changed, 107 insertions, 13 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index d8d550a..92b0d58 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -3623,6 +3623,8 @@ tcp_mss(struct tcpcb *tp, int offer)
if (cap.ifcap & CSUM_TSO) {
tp->t_flags |= TF_TSO;
tp->t_tsomax = cap.tsomax;
+ tp->t_tsomaxsegcount = cap.tsomaxsegcount;
+ tp->t_tsomaxsegsize = cap.tsomaxsegsize;
}
}
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 8abb16d..c0e2230 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -774,28 +774,112 @@ send:
flags &= ~TH_FIN;
if (tso) {
+ u_int if_hw_tsomax;
+ u_int if_hw_tsomaxsegcount;
+ u_int if_hw_tsomaxsegsize;
+ struct mbuf *mb;
+ u_int moff;
+ int max_len;
+
+ /* extract TSO information */
+ if_hw_tsomax = tp->t_tsomax;
+ if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
+ if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
+
+ /*
+ * Limit a TSO burst to prevent it from
+ * overflowing or exceeding the maximum length
+ * allowed by the network interface:
+ */
KASSERT(ipoptlen == 0,
("%s: TSO can't do IP options", __func__));
/*
- * Limit a burst to t_tsomax minus IP,
- * TCP and options length to keep ip->ip_len
- * from overflowing or exceeding the maximum
- * length allowed by the network interface.
+ * Check if we should limit by maximum payload
+ * length:
*/
- if (len > tp->t_tsomax - hdrlen) {
- len = tp->t_tsomax - hdrlen;
- sendalot = 1;
+ if (if_hw_tsomax != 0) {
+ /* compute maximum TSO length */
+ max_len = (if_hw_tsomax - hdrlen);
+ if (max_len <= 0) {
+ len = 0;
+ } else if (len > (u_int)max_len) {
+ sendalot = 1;
+ len = (u_int)max_len;
+ }
+ }
+
+ /*
+ * Check if we should limit by maximum segment
+ * size and count:
+ */
+ if (if_hw_tsomaxsegcount != 0 &&
+ if_hw_tsomaxsegsize != 0) {
+ max_len = 0;
+ mb = sbsndmbuf(&so->so_snd, off, &moff);
+
+ while (mb != NULL && (u_int)max_len < len) {
+ u_int mlen;
+ u_int frags;
+
+ /*
+ * Get length of mbuf fragment
+ * and how many hardware frags,
+ * rounded up, it would use:
+ */
+ mlen = (mb->m_len - moff);
+ frags = howmany(mlen,
+ if_hw_tsomaxsegsize);
+
+ /* Handle special case: Zero Length Mbuf */
+ if (frags == 0)
+ frags = 1;
+
+ /*
+ * Check if the fragment limit
+ * will be reached or exceeded:
+ */
+ if (frags >= if_hw_tsomaxsegcount) {
+ max_len += min(mlen,
+ if_hw_tsomaxsegcount *
+ if_hw_tsomaxsegsize);
+ break;
+ }
+ max_len += mlen;
+ if_hw_tsomaxsegcount -= frags;
+ moff = 0;
+ mb = mb->m_next;
+ }
+ if (max_len <= 0) {
+ len = 0;
+ } else if (len > (u_int)max_len) {
+ sendalot = 1;
+ len = (u_int)max_len;
+ }
}
/*
* Prevent the last segment from being
- * fractional unless the send sockbuf can
- * be emptied.
+ * fractional unless the send sockbuf can be
+ * emptied:
+ */
+ max_len = (tp->t_maxopd - optlen);
+ if ((off + len) < so->so_snd.sb_cc) {
+ moff = len % (u_int)max_len;
+ if (moff != 0) {
+ len -= moff;
+ sendalot = 1;
+ }
+ }
+
+ /*
+ * In case there are too many small fragments
+ * don't use TSO:
*/
- if (sendalot && off + len < so->so_snd.sb_cc) {
- len -= len % (tp->t_maxopd - optlen);
+ if (len <= (u_int)max_len) {
+ len = (u_int)max_len;
sendalot = 1;
+ tso = 0;
}
/*
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 0c15314..c492de7 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1808,6 +1808,8 @@ tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
ifp->if_hwassist & CSUM_TSO) {
cap->ifcap |= CSUM_TSO;
cap->tsomax = ifp->if_hw_tsomax;
+ cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
+ cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
}
}
RTFREE(sro.ro_rt);
@@ -1847,6 +1849,8 @@ tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
ifp->if_hwassist & CSUM_TSO) {
cap->ifcap |= CSUM_TSO;
cap->tsomax = ifp->if_hw_tsomax;
+ cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
+ cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
}
}
RTFREE(sro6.ro_rt);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 10b1096..72bb7ae 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -208,13 +208,15 @@ struct tcpcb {
u_int t_keepintvl; /* interval between keepalives */
u_int t_keepcnt; /* number of keepalives before close */
- u_int t_tsomax; /* tso burst length limit */
+ u_int t_tsomax; /* TSO total burst length limit in bytes */
u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */
u_int t_flags2; /* More tcpcb flags storage */
uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */
void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */
- uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */
+ uint64_t _pad[5]; /* 5 TBD (1-2 CC/RTT?) */
+ uint32_t t_tsomaxsegcount; /* TSO maximum segment count */
+ uint32_t t_tsomaxsegsize; /* TSO maximum segment size in bytes */
};
/*
@@ -342,6 +344,8 @@ struct hc_metrics_lite { /* must stay in sync with hc_metrics */
struct tcp_ifcap {
int ifcap;
u_int tsomax;
+ u_int tsomaxsegcount;
+ u_int tsomaxsegsize;
};
#ifndef _NETINET_IN_PCB_H_
OpenPOWER on IntegriCloud