summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authorlstewart <lstewart@FreeBSD.org>2009-01-15 06:44:22 +0000
committerlstewart <lstewart@FreeBSD.org>2009-01-15 06:44:22 +0000
commitd5deb43d0fd1476cec4cf2c44126170317fbe5fe (patch)
treeca77baf06d77eccefad07a0322a1f064ec3ea6d3 /sys/netinet
parent8e6c149fe3004d4eb2e0a18b523e6a135b498b5f (diff)
downloadFreeBSD-src-d5deb43d0fd1476cec4cf2c44126170317fbe5fe.zip
FreeBSD-src-d5deb43d0fd1476cec4cf2c44126170317fbe5fe.tar.gz
Add TCP Appropriate Byte Counting (RFC 3465) support to kernel.
The new behaviour is on by default, and can be disabled by setting the net.inet.tcp.rfc3465 sysctl to 0 to obtain previous behaviour. The patch changes struct tcpcb in sys/netinet/tcp_var.h which breaks the ABI. Bump __FreeBSD_version to 800061 accordingly. User space tools that rely on the size of struct tcpcb (e.g. sockstat) need to be recompiled. Reviewed by: rpaulo, gnn Approved by: gnn, kmacy (mentors) Sponsored by: FreeBSD Foundation
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/tcp_input.c70
-rw-r--r--sys/netinet/tcp_subr.c2
-rw-r--r--sys/netinet/tcp_timer.c1
-rw-r--r--sys/netinet/tcp_var.h1
-rw-r--r--sys/netinet/vinet.h4
5 files changed, 68 insertions, 10 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 30b3fde..d3b91b6 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -117,6 +117,8 @@ int tcp_insecure_rst;
int tcp_do_autorcvbuf;
int tcp_autorcvbuf_inc;
int tcp_autorcvbuf_max;
+int tcp_do_rfc3465;
+int tcp_abc_l_var;
#endif
SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_tcp, TCPCTL_STATS, stats,
@@ -144,6 +146,13 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
tcp_do_rfc3390, 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
+ tcp_do_rfc3465, 0,
+ "Enable RFC 3465 (Appropriate Byte Counting)");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW,
+ tcp_abc_l_var, 2,
+ "Cap the max cwnd increment during slow-start to this number of segments");
+
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_ecn, OID_AUTO, enable,
CTLFLAG_RW, tcp_do_ecn, 0, "TCP ECN support");
@@ -2293,20 +2302,59 @@ process_ACK:
/*
* When new data is acked, open the congestion window.
- * If the window gives us less than ssthresh packets
- * in flight, open exponentially (maxseg per packet).
- * Otherwise open linearly: maxseg per window
- * (maxseg^2 / cwnd per packet).
- * If cwnd > maxseg^2, fix the cwnd increment at 1 byte
- * to avoid capping cwnd (as suggested in RFC2581).
+ * Method depends on which congestion control state we're
+ * in (slow start or cong avoid) and if ABC (RFC 3465) is
+ * enabled.
+ *
+ * slow start: cwnd <= ssthresh
+ * cong avoid: cwnd > ssthresh
+ *
+ * slow start and ABC (RFC 3465):
+ * Grow cwnd exponentially by the amount of data
+ * ACKed capping the max increment per ACK to
+ * (abc_l_var * maxseg) bytes.
+ *
+ * slow start without ABC (RFC 2581):
+ * Grow cwnd exponentially by maxseg per ACK.
+ *
+ * cong avoid and ABC (RFC 3465):
+ * Grow cwnd linearly by maxseg per RTT for each
+ * cwnd worth of ACKed data.
+ *
+ * cong avoid without ABC (RFC 2581):
+ * Grow cwnd linearly by approximately maxseg per RTT using
+ * maxseg^2 / cwnd per ACK as the increment.
+ * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
+ * avoid capping cwnd.
*/
if ((!V_tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) ||
!IN_FASTRECOVERY(tp)) {
u_int cw = tp->snd_cwnd;
u_int incr = tp->t_maxseg;
- if (cw > tp->snd_ssthresh)
- incr = max((incr * incr / cw), 1);
- tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
+ /* In congestion avoidance? */
+ if (cw > tp->snd_ssthresh) {
+ if (V_tcp_do_rfc3465) {
+ tp->t_bytes_acked += acked;
+ if (tp->t_bytes_acked >= tp->snd_cwnd)
+ tp->t_bytes_acked -= cw;
+ else
+ incr = 0;
+ }
+ else
+ incr = max((incr * incr / cw), 1);
+ /*
+ * In slow-start with ABC enabled and no RTO in sight?
+ * (Must not use abc_l_var > 1 if slow starting after an
+ * RTO. On RTO, snd_nxt = snd_una, so the snd_nxt ==
+ * snd_max check is sufficient to handle this).
+ */
+ } else if (V_tcp_do_rfc3465 &&
+ tp->snd_nxt == tp->snd_max)
+ incr = min(acked,
+ V_tcp_abc_l_var * tp->t_maxseg);
+ /* ABC is on by default, so (incr == 0) frequently. */
+ if (incr > 0)
+ tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
}
SOCKBUF_LOCK(&so->so_snd);
if (acked > so->so_snd.sb_cc) {
@@ -2328,8 +2376,10 @@ process_ACK:
tp->snd_recover = th->th_ack - 1;
if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
IN_FASTRECOVERY(tp) &&
- SEQ_GEQ(th->th_ack, tp->snd_recover))
+ SEQ_GEQ(th->th_ack, tp->snd_recover)) {
EXIT_FASTRECOVERY(tp);
+ tp->t_bytes_acked = 0;
+ }
tp->snd_una = th->th_ack;
if (tp->t_flags & TF_SACK_PERMIT) {
if (SEQ_GT(tp->snd_una, tp->snd_recover))
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 9cb941a..53fc882 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -316,6 +316,8 @@ tcp_init(void)
V_tcp_do_autorcvbuf = 1;
V_tcp_autorcvbuf_inc = 16*1024;
V_tcp_autorcvbuf_max = 256*1024;
+ V_tcp_do_rfc3465 = 1;
+ V_tcp_abc_l_var = 2;
V_tcp_mssdflt = TCP_MSS;
#ifdef INET6
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index acce92f..6963d9c 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -587,6 +587,7 @@ tcp_timer_rexmt(void * xtp)
tp->t_dupacks = 0;
}
EXIT_FASTRECOVERY(tp);
+ tp->t_bytes_acked = 0;
(void) tcp_output(tp);
out:
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index a4392cb..306514a 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -189,6 +189,7 @@ struct tcpcb {
void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */
struct toe_usrreqs *t_tu; /* offload operations vector */
void *t_toe; /* TOE pcb pointer */
+ int t_bytes_acked; /* # bytes acked during current RTT */
};
/*
diff --git a/sys/netinet/vinet.h b/sys/netinet/vinet.h
index 449334e..618afaa 100644
--- a/sys/netinet/vinet.h
+++ b/sys/netinet/vinet.h
@@ -127,6 +127,8 @@ struct vnet_inet {
int _drop_synfin;
int _tcp_do_rfc3042;
int _tcp_do_rfc3390;
+ int _tcp_do_rfc3465;
+ int _tcp_abc_l_var;
int _tcp_do_ecn;
int _tcp_ecn_maxretries;
int _tcp_insecure_rst;
@@ -291,6 +293,7 @@ extern struct vnet_inet vnet_inet_0;
#define V_subnetsarelocal VNET_INET(subnetsarelocal)
#define V_tcb VNET_INET(tcb)
#define V_tcbinfo VNET_INET(tcbinfo)
+#define V_tcp_abc_l_var VNET_INET(tcp_abc_l_var)
#define V_tcp_autorcvbuf_inc VNET_INET(tcp_autorcvbuf_inc)
#define V_tcp_autorcvbuf_max VNET_INET(tcp_autorcvbuf_max)
#define V_tcp_autosndbuf_inc VNET_INET(tcp_autosndbuf_inc)
@@ -303,6 +306,7 @@ extern struct vnet_inet vnet_inet_0;
#define V_tcp_do_rfc1323 VNET_INET(tcp_do_rfc1323)
#define V_tcp_do_rfc3042 VNET_INET(tcp_do_rfc3042)
#define V_tcp_do_rfc3390 VNET_INET(tcp_do_rfc3390)
+#define V_tcp_do_rfc3465 VNET_INET(tcp_do_rfc3465)
#define V_tcp_do_sack VNET_INET(tcp_do_sack)
#define V_tcp_do_tso VNET_INET(tcp_do_tso)
#define V_tcp_ecn_maxretries VNET_INET(tcp_ecn_maxretries)
OpenPOWER on IntegriCloud