summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/netinet/siftr.c4
-rw-r--r--sys/netinet/tcp.h2
-rw-r--r--sys/netinet/tcp_input.c4
-rw-r--r--sys/netinet/tcp_output.c1
-rw-r--r--sys/netinet/tcp_subr.c206
-rw-r--r--sys/netinet/tcp_timer.h3
-rw-r--r--sys/netinet/tcp_usrreq.c22
-rw-r--r--sys/netinet/tcp_var.h9
8 files changed, 16 insertions, 235 deletions
diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c
index 34f3636..6097ad4 100644
--- a/sys/netinet/siftr.c
+++ b/sys/netinet/siftr.c
@@ -193,7 +193,7 @@ struct pkt_node {
u_long snd_wnd;
/* Receive Window (bytes). */
u_long rcv_wnd;
- /* Bandwidth Controlled Window (bytes). */
+ /* Unused (was: Bandwidth Controlled Window (bytes)). */
u_long snd_bwnd;
/* Slow Start Threshold (bytes). */
u_long snd_ssthresh;
@@ -775,7 +775,7 @@ siftr_siftdata(struct pkt_node *pn, struct inpcb *inp, struct tcpcb *tp,
pn->snd_cwnd = tp->snd_cwnd;
pn->snd_wnd = tp->snd_wnd;
pn->rcv_wnd = tp->rcv_wnd;
- pn->snd_bwnd = tp->snd_bwnd;
+ pn->snd_bwnd = 0; /* Unused, kept for compat. */
pn->snd_ssthresh = tp->snd_ssthresh;
pn->snd_scale = tp->snd_scale;
pn->rcv_scale = tp->rcv_scale;
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 27d45aa..62a89f7 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -221,7 +221,7 @@ struct tcp_info {
/* FreeBSD extensions to tcp_info. */
u_int32_t tcpi_snd_wnd; /* Advertised send window. */
- u_int32_t tcpi_snd_bwnd; /* Bandwidth send window. */
+ u_int32_t tcpi_snd_bwnd; /* No longer used. */
u_int32_t tcpi_snd_nxt; /* Next egress seqno */
u_int32_t tcpi_rcv_nxt; /* Next ingress seqno */
u_int32_t tcpi_toe_tid; /* HWTID for TOE endpoints */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index e4bddb9..22a2ea4 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1321,7 +1321,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_xmit_timer(tp,
ticks - tp->t_rtttime);
}
- tcp_xmit_bandwidth_limit(tp, th->th_ack);
acked = th->th_ack - tp->snd_una;
TCPSTAT_INC(tcps_rcvackpack);
TCPSTAT_ADD(tcps_rcvackbyte, acked);
@@ -2278,7 +2277,6 @@ process_ACK:
tp->t_rttlow = ticks - tp->t_rtttime;
tcp_xmit_timer(tp, ticks - tp->t_rtttime);
}
- tcp_xmit_bandwidth_limit(tp, th->th_ack);
/*
* If all outstanding data is acked, stop retransmit
@@ -3328,8 +3326,6 @@ tcp_mss(struct tcpcb *tp, int offer)
tp->snd_ssthresh = max(2 * mss, metrics.rmx_ssthresh);
TCPSTAT_INC(tcps_usedssthresh);
}
- if (metrics.rmx_bandwidth)
- tp->snd_bandwidth = metrics.rmx_bandwidth;
/*
* Set the slow-start flight size depending on whether this
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 50d0ee6..94b48fc 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -225,7 +225,6 @@ again:
tso = 0;
off = tp->snd_nxt - tp->snd_una;
sendwin = min(tp->snd_wnd, tp->snd_cwnd);
- sendwin = min(sendwin, tp->snd_bwnd);
flags = tcp_outflags[tp->t_state];
/*
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index b537fb9..d19a91a 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -160,14 +160,6 @@ SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
"Default TCP Maximum Segment Size for IPv6");
#endif
-static int
-vnet_sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
-{
-
- VNET_SYSCTL_ARG(req, arg1);
- return (sysctl_msec_to_ticks(oidp, arg1, arg2, req));
-}
-
/*
* Minimum MSS we accept and use. This prevents DoS attacks where
* we are forced to a ridiculous low MSS like 20 and send hundreds
@@ -213,50 +205,6 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW,
&VNET_NAME(tcp_isn_reseed_interval), 0,
"Seconds between reseeding of ISN secret");
-/*
- * TCP bandwidth limiting sysctls. Note that the default lower bound of
- * 1024 exists only for debugging. A good production default would be
- * something like 6100.
- */
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, inflight, CTLFLAG_RW, 0,
- "TCP inflight data limiting");
-
-static VNET_DEFINE(int, tcp_inflight_enable) = 0;
-#define V_tcp_inflight_enable VNET(tcp_inflight_enable)
-SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, enable, CTLFLAG_RW,
- &VNET_NAME(tcp_inflight_enable), 0,
- "Enable automatic TCP inflight data limiting");
-
-static int tcp_inflight_debug = 0;
-SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, debug, CTLFLAG_RW,
- &tcp_inflight_debug, 0,
- "Debug TCP inflight calculations");
-
-static VNET_DEFINE(int, tcp_inflight_rttthresh);
-#define V_tcp_inflight_rttthresh VNET(tcp_inflight_rttthresh)
-SYSCTL_VNET_PROC(_net_inet_tcp_inflight, OID_AUTO, rttthresh,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_inflight_rttthresh), 0,
- vnet_sysctl_msec_to_ticks, "I",
- "RTT threshold below which inflight will deactivate itself");
-
-static VNET_DEFINE(int, tcp_inflight_min) = 6144;
-#define V_tcp_inflight_min VNET(tcp_inflight_min)
-SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, min, CTLFLAG_RW,
- &VNET_NAME(tcp_inflight_min), 0,
- "Lower-bound for TCP inflight window");
-
-static VNET_DEFINE(int, tcp_inflight_max) = TCP_MAXWIN << TCP_MAX_WINSHIFT;
-#define V_tcp_inflight_max VNET(tcp_inflight_max)
-SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, max, CTLFLAG_RW,
- &VNET_NAME(tcp_inflight_max), 0,
- "Upper-bound for TCP inflight window");
-
-static VNET_DEFINE(int, tcp_inflight_stab) = 20;
-#define V_tcp_inflight_stab VNET(tcp_inflight_stab)
-SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
- &VNET_NAME(tcp_inflight_stab), 0,
- "Inflight Algorithm Stabilization 20 = 2 packets");
-
#ifdef TCP_SORECEIVE_STREAM
static int tcp_soreceive_stream = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
@@ -338,8 +286,6 @@ tcp_init(void)
in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
"tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE);
- V_tcp_inflight_rttthresh = TCPTV_INFLIGHT_RTTTHRESH;
-
/*
* These have to be type stable for the benefit of the timers.
*/
@@ -728,10 +674,8 @@ tcp_newtcpcb(struct inpcb *inp)
tp->t_rttmin = tcp_rexmit_min;
tp->t_rxtcur = TCPTV_RTOBASE;
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
- tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->t_rcvtime = ticks;
- tp->t_bw_rtttime = ticks;
/*
* IPv4 TTL initialization is necessary for an IPv6 socket as well,
* because the socket may be bound to an IPv6 wildcard address,
@@ -849,8 +793,6 @@ tcp_discardcb(struct tcpcb *tp)
metrics.rmx_rtt = tp->t_srtt;
metrics.rmx_rttvar = tp->t_rttvar;
- /* XXX: This wraps if the pipe is more than 4 Gbit per second */
- metrics.rmx_bandwidth = tp->snd_bandwidth;
metrics.rmx_cwnd = tp->snd_cwnd;
metrics.rmx_sendpipe = 0;
metrics.rmx_recvpipe = 0;
@@ -1773,154 +1715,6 @@ ipsec_hdrsiz_tcp(struct tcpcb *tp)
}
#endif /* IPSEC */
-/*
- * TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
- *
- * This code attempts to calculate the bandwidth-delay product as a
- * means of determining the optimal window size to maximize bandwidth,
- * minimize RTT, and avoid the over-allocation of buffers on interfaces and
- * routers. This code also does a fairly good job keeping RTTs in check
- * across slow links like modems. We implement an algorithm which is very
- * similar (but not meant to be) TCP/Vegas. The code operates on the
- * transmitter side of a TCP connection and so only effects the transmit
- * side of the connection.
- *
- * BACKGROUND: TCP makes no provision for the management of buffer space
- * at the end points or at the intermediate routers and switches. A TCP
- * stream, whether using NewReno or not, will eventually buffer as
- * many packets as it is able and the only reason this typically works is
- * due to the fairly small default buffers made available for a connection
- * (typicaly 16K or 32K). As machines use larger windows and/or window
- * scaling it is now fairly easy for even a single TCP connection to blow-out
- * all available buffer space not only on the local interface, but on
- * intermediate routers and switches as well. NewReno makes a misguided
- * attempt to 'solve' this problem by waiting for an actual failure to occur,
- * then backing off, then steadily increasing the window again until another
- * failure occurs, ad-infinitum. This results in terrible oscillation that
- * is only made worse as network loads increase and the idea of intentionally
- * blowing out network buffers is, frankly, a terrible way to manage network
- * resources.
- *
- * It is far better to limit the transmit window prior to the failure
- * condition being achieved. There are two general ways to do this: First
- * you can 'scan' through different transmit window sizes and locate the
- * point where the RTT stops increasing, indicating that you have filled the
- * pipe, then scan backwards until you note that RTT stops decreasing, then
- * repeat ad-infinitum. This method works in principle but has severe
- * implementation issues due to RTT variances, timer granularity, and
- * instability in the algorithm which can lead to many false positives and
- * create oscillations as well as interact badly with other TCP streams
- * implementing the same algorithm.
- *
- * The second method is to limit the window to the bandwidth delay product
- * of the link. This is the method we implement. RTT variances and our
- * own manipulation of the congestion window, bwnd, can potentially
- * destabilize the algorithm. For this reason we have to stabilize the
- * elements used to calculate the window. We do this by using the minimum
- * observed RTT, the long term average of the observed bandwidth, and
- * by adding two segments worth of slop. It isn't perfect but it is able
- * to react to changing conditions and gives us a very stable basis on
- * which to extend the algorithm.
- */
-void
-tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq)
-{
- u_long bw;
- u_long bwnd;
- int save_ticks;
-
- INP_WLOCK_ASSERT(tp->t_inpcb);
-
- /*
- * If inflight_enable is disabled in the middle of a tcp connection,
- * make sure snd_bwnd is effectively disabled.
- */
- if (V_tcp_inflight_enable == 0 ||
- tp->t_rttlow < V_tcp_inflight_rttthresh) {
- tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
- tp->snd_bandwidth = 0;
- return;
- }
-
- /*
- * Figure out the bandwidth. Due to the tick granularity this
- * is a very rough number and it MUST be averaged over a fairly
- * long period of time. XXX we need to take into account a link
- * that is not using all available bandwidth, but for now our
- * slop will ramp us up if this case occurs and the bandwidth later
- * increases.
- *
- * Note: if ticks rollover 'bw' may wind up negative. We must
- * effectively reset t_bw_rtttime for this case.
- */
- save_ticks = ticks;
- if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1)
- return;
-
- bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz /
- (save_ticks - tp->t_bw_rtttime);
- tp->t_bw_rtttime = save_ticks;
- tp->t_bw_rtseq = ack_seq;
- if (tp->t_bw_rtttime == 0 || (int)bw < 0)
- return;
- bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
-
- tp->snd_bandwidth = bw;
-
- /*
- * Calculate the semi-static bandwidth delay product, plus two maximal
- * segments. The additional slop puts us squarely in the sweet
- * spot and also handles the bandwidth run-up case and stabilization.
- * Without the slop we could be locking ourselves into a lower
- * bandwidth.
- *
- * Situations Handled:
- * (1) Prevents over-queueing of packets on LANs, especially on
- * high speed LANs, allowing larger TCP buffers to be
- * specified, and also does a good job preventing
- * over-queueing of packets over choke points like modems
- * (at least for the transmit side).
- *
- * (2) Is able to handle changing network loads (bandwidth
- * drops so bwnd drops, bandwidth increases so bwnd
- * increases).
- *
- * (3) Theoretically should stabilize in the face of multiple
- * connections implementing the same algorithm (this may need
- * a little work).
- *
- * (4) Stability value (defaults to 20 = 2 maximal packets) can
- * be adjusted with a sysctl but typically only needs to be
- * on very slow connections. A value no smaller then 5
- * should be used, but only reduce this default if you have
- * no other choice.
- */
-#define USERTT ((tp->t_srtt + tp->t_rttbest) / 2)
- bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + V_tcp_inflight_stab * tp->t_maxseg / 10;
-#undef USERTT
-
- if (tcp_inflight_debug > 0) {
- static int ltime;
- if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) {
- ltime = ticks;
- printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n",
- tp,
- bw,
- tp->t_rttbest,
- tp->t_srtt,
- bwnd
- );
- }
- }
- if ((long)bwnd < V_tcp_inflight_min)
- bwnd = V_tcp_inflight_min;
- if (bwnd > V_tcp_inflight_max)
- bwnd = V_tcp_inflight_max;
- if ((long)bwnd < tp->t_maxseg * 2)
- bwnd = tp->t_maxseg * 2;
- tp->snd_bwnd = bwnd;
-}
-
#ifdef TCP_SIGNATURE
/*
* Callback function invoked by m_apply() to digest TCP segment data
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
index 1ab0b7b..4bfcdf6 100644
--- a/sys/netinet/tcp_timer.h
+++ b/sys/netinet/tcp_timer.h
@@ -86,9 +86,6 @@
#define TCPTV_KEEPINTVL ( 75*hz) /* default probe interval */
#define TCPTV_KEEPCNT 8 /* max probes before drop */
-#define TCPTV_INFLIGHT_RTTTHRESH (10*hz/1000) /* below which inflight
- disengages, in msec */
-
#define TCPTV_FINWAIT2_TIMEOUT (60*hz) /* FIN_WAIT_2 timeout if no receiver */
/*
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 2e61c31..f35890b 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1105,7 +1105,6 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
tp->t_state = TCPS_SYN_SENT;
tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
tp->iss = tcp_new_isn(tp);
- tp->t_bw_rtseq = tp->iss;
tcp_sendseqinit(tp);
return 0;
@@ -1168,7 +1167,6 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
tp->t_state = TCPS_SYN_SENT;
tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
tp->iss = tcp_new_isn(tp);
- tp->t_bw_rtseq = tp->iss;
tcp_sendseqinit(tp);
return 0;
@@ -1214,7 +1212,7 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
ti->tcpi_rcv_space = tp->rcv_wnd;
ti->tcpi_rcv_nxt = tp->rcv_nxt;
ti->tcpi_snd_wnd = tp->snd_wnd;
- ti->tcpi_snd_bwnd = tp->snd_bwnd;
+ ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */
ti->tcpi_snd_nxt = tp->snd_nxt;
ti->tcpi_snd_mss = tp->t_maxseg;
ti->tcpi_rcv_mss = tp->t_maxseg;
@@ -1795,26 +1793,24 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
db_print_indent(indent);
- db_printf("snd_wnd: %lu snd_cwnd: %lu snd_bwnd: %lu\n",
- tp->snd_wnd, tp->snd_cwnd, tp->snd_bwnd);
+ db_printf("snd_wnd: %lu snd_cwnd: %lu\n",
+ tp->snd_wnd, tp->snd_cwnd);
db_print_indent(indent);
- db_printf("snd_ssthresh: %lu snd_bandwidth: %lu snd_recover: "
- "0x%08x\n", tp->snd_ssthresh, tp->snd_bandwidth,
- tp->snd_recover);
+ db_printf("snd_ssthresh: %lu snd_recover: "
+ "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
db_print_indent(indent);
db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n",
tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
db_print_indent(indent);
- db_printf("t_rttime: %u t_rtsq: 0x%08x t_bw_rtttime: %u\n",
- tp->t_rtttime, tp->t_rtseq, tp->t_bw_rtttime);
+ db_printf("t_rttime: %u t_rtsq: 0x%08x\n",
+ tp->t_rtttime, tp->t_rtseq);
db_print_indent(indent);
- db_printf("t_bw_rtseq: 0x%08x t_rxtcur: %d t_maxseg: %u "
- "t_srtt: %d\n", tp->t_bw_rtseq, tp->t_rxtcur, tp->t_maxseg,
- tp->t_srtt);
+ db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n",
+ tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
db_print_indent(indent);
db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u "
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index b753e10..2b7abca 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -135,12 +135,12 @@ struct tcpcb {
u_long snd_wnd; /* send window */
u_long snd_cwnd; /* congestion-controlled window */
- u_long snd_bwnd; /* bandwidth-controlled window */
+ u_long snd_spare1; /* unused */
u_long snd_ssthresh; /* snd_cwnd size threshold for
* for slow start exponential to
* linear switch
*/
- u_long snd_bandwidth; /* calculated bandwidth or 0 */
+ u_long snd_spare2; /* unused */
tcp_seq snd_recover; /* for use in NewReno Fast Recovery */
u_int t_maxopd; /* mss plus options */
@@ -150,8 +150,8 @@ struct tcpcb {
u_int t_rtttime; /* RTT measurement start time */
tcp_seq t_rtseq; /* sequence number being timed */
- u_int t_bw_rtttime; /* used for bandwidth calculation */
- tcp_seq t_bw_rtseq; /* used for bandwidth calculation */
+ u_int t_bw_spare1; /* unused */
+ tcp_seq t_bw_spare2; /* unused */
int t_rxtcur; /* current retransmit value (ticks) */
u_int t_maxseg; /* maximum segment size */
@@ -654,7 +654,6 @@ void tcpip_fillheaders(struct inpcb *, void *, void *);
void tcp_timer_activate(struct tcpcb *, int, u_int);
int tcp_timer_active(struct tcpcb *, int);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
-void tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq);
/*
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
*/
OpenPOWER on IntegriCloud