summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@osdl.org>2005-11-10 17:09:53 -0800
committerDavid S. Miller <davem@davemloft.net>2005-11-10 17:09:53 -0800
commit9772efb970780aeed488c19d8b4afd46c3b484af (patch)
treede016aaa29c8a95e98c7abaa70c8b590160e2886
parent7faffa1c7fb9b8e8917e3225d4e2638270c0a48b (diff)
downloadop-kernel-dev-9772efb970780aeed488c19d8b4afd46c3b484af.zip
op-kernel-dev-9772efb970780aeed488c19d8b4afd46c3b484af.tar.gz
[TCP]: Appropriate Byte Count support
This is an updated version of the RFC3465 ABC patch originally for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting bytes ack'd rather than packets when updating congestion control. The orignal ABC described in the RFC applied to a Reno style algorithm. For advanced congestion control there is little change after leaving slow start. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt5
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/net/tcp.h19
-rw-r--r--net/ipv4/sysctl_net_ipv4.c8
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_cong.c31
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv4/tcp_minisocks.c1
9 files changed, 63 insertions, 11 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 65895bb..ebc09a1 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
TCP variables:
+tcp_abc - INTEGER
+ Controls Appropriate Byte Count defined in RFC3465. If set to
+ 0 then does congestion avoid once per ack. 1 is conservative
+ value, and 2 is more agressive.
+
tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 255. Default value
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 22cf5e1..ab2791b 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -390,6 +390,7 @@ enum
NET_TCP_BIC_BETA=108,
NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
NET_TCP_CONG_CONTROL=110,
+ NET_TCP_ABC=111,
};
enum {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index ac4ca44..737b32e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -326,6 +326,7 @@ struct tcp_sock {
__u32 snd_up; /* Urgent pointer */
__u32 total_retrans; /* Total retransmits for entire connection */
+ __u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */
unsigned int keepalive_time; /* time before keep alive takes place */
unsigned int keepalive_intvl; /* time interval between keep alive probes */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 54c39988..44ba4a2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor;
+extern int sysctl_tcp_abc;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
@@ -770,6 +771,23 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
*/
static inline void tcp_slow_start(struct tcp_sock *tp)
{
+ if (sysctl_tcp_abc) {
+ /* RFC3465: Slow Start
+ * TCP sender SHOULD increase cwnd by the number of
+ * previously unacknowledged bytes ACKed by each incoming
+ * acknowledgment, provided the increase is not more than L
+ */
+ if (tp->bytes_acked < tp->mss_cache)
+ return;
+
+ /* We MAY increase by 2 if discovered delayed ack */
+ if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
+ }
+ tp->bytes_acked = 0;
+
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
}
@@ -804,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
tp->prior_ssthresh = 0;
+ tp->bytes_acked = 0;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
__tcp_enter_cwr(sk);
tcp_set_ca_state(sk, TCP_CA_CWR);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6526856..01444a0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_tcp_congestion_control,
.strategy = &sysctl_tcp_congestion_control,
},
+ {
+ .ctl_name = NET_TCP_ABC,
+ .procname = "tcp_abc",
+ .data = &sysctl_tcp_abc,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 72b7c22..cfaf761 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->packets_out = 0;
tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0;
+ tp->bytes_acked = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6d3e883..c7cc62c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -192,17 +192,26 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
/* In "safe" area, increase. */
if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp);
- else {
- /* In dangerous area, increase slowly.
- * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
- */
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- tp->snd_cwnd_cnt = 0;
- } else
- tp->snd_cwnd_cnt++;
- }
+
+ /* In dangerous area, increase slowly. */
+ else if (sysctl_tcp_abc) {
+ /* RFC3465: Apppriate Byte Count
+ * increase once for each full cwnd acked
+ */
+ if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+ tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
+ } else {
+ /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt = 0;
+ } else
+ tp->snd_cwnd_cnt++;
+ }
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e430656..4cb5e6f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
int sysctl_tcp_nometrics_save;
int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->bytes_acked = 0;
tcp_clear_retrans(tp);
/* Push undo marker, if it was plain RTO and nothing
@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
TCP_ECN_queue_cwr(tp);
}
+ tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
@@ -2310,6 +2313,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una))
goto old_ack;
+ if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+ tp->bytes_acked += ack - prior_snd_una;
+
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
* No more checks are required.
@@ -4370,6 +4376,7 @@ discard:
EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b1a63b2..9203a21 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
*/
newtp->snd_cwnd = 2;
newtp->snd_cwnd_cnt = 0;
+ newtp->bytes_acked = 0;
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
OpenPOWER on IntegriCloud