summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorAndreas Petlund <apetlund@simula.no>2010-02-18 02:47:01 +0000
committerDavid S. Miller <davem@davemloft.net>2010-02-18 15:43:08 -0800
commit36e31b0af58728071e8023cf8e20c5166b700717 (patch)
tree8b4d251bf78965ac7501bea9011786b8255a3312 /net
parent5aa4b32fc86408705337e941ed716880c63d1590 (diff)
downloadop-kernel-dev-36e31b0af58728071e8023cf8e20c5166b700717.zip
op-kernel-dev-36e31b0af58728071e8023cf8e20c5166b700717.tar.gz
net: TCP thin linear timeouts
This patch will make TCP use only linear timeouts if the stream is thin. This will help to avoid the very high latencies that thin stream suffer because of exponential backoff. This mechanism is only active if enabled by iocontrol or syscontrol and the stream is identified as thin. A maximum of 6 linear timeouts is tried before exponential backoff is resumed. Signed-off-by: Andreas Petlund <apetlund@simula.no> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_timer.c21
3 files changed, 34 insertions, 1 deletions
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712c..e6a2460 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -576,6 +576,13 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "tcp_thin_linear_timeouts",
+ .data = &sysctl_tcp_thin_linear_timeouts,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e471d03..21bae9a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2229,6 +2229,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
}
break;
+ case TCP_THIN_LINEAR_TIMEOUTS:
+ if (val < 0 || val > 1)
+ err = -EINVAL;
+ else
+ tp->thin_lto = val;
+ break;
+
case TCP_CORK:
/* When set indicates to always queue non-full frames.
* Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index de7d1bf..a17629b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
int sysctl_tcp_orphan_retries __read_mostly;
+int sysctl_tcp_thin_linear_timeouts __read_mostly;
static void tcp_write_timer(unsigned long);
static void tcp_delack_timer(unsigned long);
@@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk)
icsk->icsk_retransmits++;
out_reset_timer:
- icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
+ * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
+ * might be increased if the stream oscillates between thin and thick,
+ * thus the old value might already be too high compared to the value
+ * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
+ * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
+ * exponential backoff behaviour to avoid continue hammering
+ * linear-timeout retransmissions into a black hole
+ */
+ if (sk->sk_state == TCP_ESTABLISHED &&
+ (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
+ tcp_stream_is_thin(tp) &&
+ icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
+ icsk->icsk_backoff = 0;
+ icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+ } else {
+ /* Use normal (exponential) backoff */
+ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ }
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
__sk_dst_reset(sk);
OpenPOWER on IntegriCloud