summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authorwollman <wollman@FreeBSD.org>1996-03-22 18:09:21 +0000
committerwollman <wollman@FreeBSD.org>1996-03-22 18:09:21 +0000
commitacfe4c4467db308020c97e72d3b390ee773f337c (patch)
treeabd55e1ebb0eb00f663828297dcb5cd2f74c9bf4 /sys/netinet
parentc29bc64893c71ac25bb21b6ce4c543fa1a1b3755 (diff)
downloadFreeBSD-src-acfe4c4467db308020c97e72d3b390ee773f337c.zip
FreeBSD-src-acfe4c4467db308020c97e72d3b390ee773f337c.tar.gz
A number of performance-reducing flaws fixed based on comments
from Larry Peterson &co. at Arizona: - Header prediction for ACKs did not exclude Fast Retransmit/Recovery. - srtt calculation tended to get ``stuck'' and could never decrease when below 8. It still can't, but the scaling factors are adjusted so that this artifact does not cause as bad an effect on the RTO value as it used to. The paper also points out the incr/8 error that has been long since fixed, and the problems with ACKing frequency resulting from the use of options which I suspect to be fixed already as well (as part of the T/TCP work). Obtained from: Brakmo & Peterson, ``Performance Problems in BSD4.4 TCP''
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/tcp_input.c57
-rw-r--r--sys/netinet/tcp_reass.c57
-rw-r--r--sys/netinet/tcp_var.h24
3 files changed, 131 insertions, 7 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 177b98f..8c6928b 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
- * $Id: tcp_input.c,v 1.37 1996/02/26 21:47:10 guido Exp $
+ * $Id: tcp_input.c,v 1.38 1996/03/11 15:13:29 davidg Exp $
*/
#ifndef TUBA_INCLUDE
@@ -492,7 +492,8 @@ findpcb:
if (ti->ti_len == 0) {
if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
- tp->snd_cwnd >= tp->snd_wnd) {
+ tp->snd_cwnd >= tp->snd_wnd &&
+ tp->t_dupacks < tcprexmtthresh) {
/*
* this is a pure ack for outstanding data.
*/
@@ -1257,7 +1258,7 @@ trimthenstep6:
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
- if (tp->t_dupacks > tcprexmtthresh &&
+ if (tp->t_dupacks >= tcprexmtthresh &&
tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_dupacks = 0;
@@ -1819,6 +1820,7 @@ tcp_xmit_timer(tp, rtt)
register struct tcpcb *tp;
short rtt;
{
+#ifdef notdef
register short delta;
tcpstat.tcps_rttupdated++;
@@ -1858,6 +1860,50 @@ tcp_xmit_timer(tp, rtt)
tp->t_srtt = rtt << TCP_RTT_SHIFT;
tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
}
+#else /* Peterson paper */
+ register int delta;
+
+ tcpstat.tcps_rttupdated++;
+ tp->t_rttupdated++;
+ if (tp->t_srtt != 0) {
+ /*
+ * srtt is stored as fixed point with 5 bits after the
+ * binary point (i.e., scaled by 8). The following magic
+ * is equivalent to the smoothing algorithm in rfc793 with
+ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+ * point). Adjust rtt to origin 0.
+ */
+ delta = ((rtt - 1) << TCP_DELTA_SHIFT)
+ - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
+
+ if ((tp->t_srtt += delta) <= 0)
+ tp->t_srtt = 1;
+
+ /*
+ * We accumulate a smoothed rtt variance (actually, a
+ * smoothed mean difference), then set the retransmit
+ * timer to smoothed rtt + 4 times the smoothed variance.
+ * rttvar is stored as fixed point with 4 bits after the
+ * binary point (scaled by 16). The following is
+ * equivalent to rfc793 smoothing with an alpha of .75
+ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces
+ * rfc793's wired-in beta.
+ */
+ if (delta < 0)
+ delta = -delta;
+ delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
+ if ((tp->t_rttvar += delta) <= 0)
+ tp->t_rttvar = 1;
+ } else {
+ /*
+ * No rtt measurement yet - use the unsmoothed rtt.
+ * Set the variance to half the rtt (so our first
+ * retransmit happens at 3*rtt).
+ */
+ tp->t_srtt = rtt << TCP_RTT_SHIFT;
+ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+ }
+#endif
tp->t_rtt = 0;
tp->t_rxtshift = 0;
@@ -1872,8 +1918,13 @@ tcp_xmit_timer(tp, rtt)
* statistical, we have to test that we don't drop below
* the minimum feasible timer (which is 2 ticks).
*/
+#ifdef notdef
TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
tp->t_rttmin, TCPTV_REXMTMAX);
+#else /* Peterson */
+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+ max(tp->t_rttmin, TCPTV_MIN + rtt - 1), TCPTV_REXMTMAX);
+#endif
/*
* We received an ack for a packet that wasn't retransmitted;
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 177b98f..8c6928b 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
- * $Id: tcp_input.c,v 1.37 1996/02/26 21:47:10 guido Exp $
+ * $Id: tcp_input.c,v 1.38 1996/03/11 15:13:29 davidg Exp $
*/
#ifndef TUBA_INCLUDE
@@ -492,7 +492,8 @@ findpcb:
if (ti->ti_len == 0) {
if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
- tp->snd_cwnd >= tp->snd_wnd) {
+ tp->snd_cwnd >= tp->snd_wnd &&
+ tp->t_dupacks < tcprexmtthresh) {
/*
* this is a pure ack for outstanding data.
*/
@@ -1257,7 +1258,7 @@ trimthenstep6:
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
- if (tp->t_dupacks > tcprexmtthresh &&
+ if (tp->t_dupacks >= tcprexmtthresh &&
tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_dupacks = 0;
@@ -1819,6 +1820,7 @@ tcp_xmit_timer(tp, rtt)
register struct tcpcb *tp;
short rtt;
{
+#ifdef notdef
register short delta;
tcpstat.tcps_rttupdated++;
@@ -1858,6 +1860,50 @@ tcp_xmit_timer(tp, rtt)
tp->t_srtt = rtt << TCP_RTT_SHIFT;
tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
}
+#else /* Peterson paper */
+ register int delta;
+
+ tcpstat.tcps_rttupdated++;
+ tp->t_rttupdated++;
+ if (tp->t_srtt != 0) {
+ /*
+ * srtt is stored as fixed point with 5 bits after the
+ * binary point (i.e., scaled by 8). The following magic
+ * is equivalent to the smoothing algorithm in rfc793 with
+ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+ * point). Adjust rtt to origin 0.
+ */
+ delta = ((rtt - 1) << TCP_DELTA_SHIFT)
+ - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
+
+ if ((tp->t_srtt += delta) <= 0)
+ tp->t_srtt = 1;
+
+ /*
+ * We accumulate a smoothed rtt variance (actually, a
+ * smoothed mean difference), then set the retransmit
+ * timer to smoothed rtt + 4 times the smoothed variance.
+ * rttvar is stored as fixed point with 4 bits after the
+ * binary point (scaled by 16). The following is
+ * equivalent to rfc793 smoothing with an alpha of .75
+ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces
+ * rfc793's wired-in beta.
+ */
+ if (delta < 0)
+ delta = -delta;
+ delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
+ if ((tp->t_rttvar += delta) <= 0)
+ tp->t_rttvar = 1;
+ } else {
+ /*
+ * No rtt measurement yet - use the unsmoothed rtt.
+ * Set the variance to half the rtt (so our first
+ * retransmit happens at 3*rtt).
+ */
+ tp->t_srtt = rtt << TCP_RTT_SHIFT;
+ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+ }
+#endif
tp->t_rtt = 0;
tp->t_rxtshift = 0;
@@ -1872,8 +1918,13 @@ tcp_xmit_timer(tp, rtt)
* statistical, we have to test that we don't drop below
* the minimum feasible timer (which is 2 ticks).
*/
+#ifdef notdef
TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
tp->t_rttmin, TCPTV_REXMTMAX);
+#else /* Peterson */
+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+ max(tp->t_rttmin, TCPTV_MIN + rtt - 1), TCPTV_REXMTMAX);
+#endif
/*
* We received an ack for a packet that wasn't retransmitted;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index d53251a0..3035143 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $Id: tcp_var.h,v 1.29 1996/02/26 21:47:13 guido Exp $
+ * $Id: tcp_var.h,v 1.30 1996/02/27 15:12:53 bde Exp $
*/
#ifndef _NETINET_TCP_VAR_H_
@@ -191,10 +191,18 @@ struct rmxp_tao {
* and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the
* binary point, and is smoothed with an ALPHA of 0.75.
*/
+#ifdef notdef
#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */
#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */
#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */
#define TCP_RTTVAR_SHIFT 2 /* shift for rttvar; 2 bits */
+#else
+#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */
+#define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */
+#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */
+#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */
+#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */
+#endif
/*
* The initial retransmission should happen at rtt + 4 * rttvar.
@@ -206,11 +214,25 @@ struct rmxp_tao {
* 1.5 tick we need. But, because the bias is
* statistical, we have to test that we don't drop below
* the minimum feasible timer (which is 2 ticks).
+#ifdef notdef
* This macro assumes that the value of TCP_RTTVAR_SCALE
* is the same as the multiplier for rttvar.
+#else
+ * This version of the macro adapted from a paper by Lawrence
+ * Brakmo and Larry Peterson which outlines a problem caused
+ * by insufficient precision in the original implementation,
+ * which results in inappropriately large RTO values for very
+ * fast networks.
+#endif
*/
+#ifdef notdef
#define TCP_REXMTVAL(tp) \
(((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar)
+#else
+#define TCP_REXMTVAL(tp) \
+ ((((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_RTTVAR_SHIFT)) \
+ + ((tp)->t_rttvar) >> TCP_RTTVAR_SHIFT))
+#endif
/* XXX
* We want to avoid doing m_pullup on incoming packets but that
OpenPOWER on IntegriCloud